From 3ec6d461eecea12dc466f1d976e9a97d11f56d8c Mon Sep 17 00:00:00 2001
From: estelle <estelle.scifo@neo4j.com>
Date: Wed, 20 Aug 2025 17:34:41 +0200
Subject: [PATCH 1/8] Create Document node even from text input

---
 CHANGELOG.md                                  |  6 ++++
 .../build_graph/simple_kg_builder_from_pdf.py |  7 ++++-
 .../simple_kg_builder_from_text.py            | 10 +++++-
 .../template_pipeline/simple_kg_builder.py    | 31 ++++++++++---------
 .../experimental/pipeline/kg_builder.py       | 17 ++++++++--
 .../test_simple_kg_builder.py                 | 10 +++---
 .../experimental/pipeline/test_kg_builder.py  | 27 +++++++++++-----
 7 files changed, 78 insertions(+), 30 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f36908738..35a2154fd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,12 @@
 - Fixed an edge case where the LLM can output a property with type 'map', which was causing errors during import as it is not a valid property type in Neo4j.
 
 
+### Added
+
+- Document node is now always created when running SimpleKGPipeline, even if `from_pdf=False`.
+- Document metadata is exposed in SimpleKGPipeline run method.
+
+
 ## 1.9.1
 
 ### Fixed
diff --git a/examples/build_graph/simple_kg_builder_from_pdf.py b/examples/build_graph/simple_kg_builder_from_pdf.py
index 2cfc85134..0f98e0e66 100644
--- a/examples/build_graph/simple_kg_builder_from_pdf.py
+++ b/examples/build_graph/simple_kg_builder_from_pdf.py
@@ -54,7 +54,12 @@ async def define_and_run_pipeline(
         },
         neo4j_database=DATABASE,
     )
-    return await kg_builder.run_async(file_path=str(file_path))
+    return await kg_builder.run_async(
+        file_path=str(file_path),
+        # optional, add document metadata, each item will
+        # be saved as a property of the Document node
+        document_metadata={"author": "J. K. Rowling"},
+    )
 
 
 async def main() -> PipelineResult:
diff --git a/examples/build_graph/simple_kg_builder_from_text.py b/examples/build_graph/simple_kg_builder_from_text.py
index 548cbd9eb..330a3a8bd 100644
--- a/examples/build_graph/simple_kg_builder_from_text.py
+++ b/examples/build_graph/simple_kg_builder_from_text.py
@@ -79,7 +79,15 @@ async def define_and_run_pipeline(
         from_pdf=False,
         neo4j_database=DATABASE,
     )
-    return await kg_builder.run_async(text=TEXT)
+    return await kg_builder.run_async(
+        text=TEXT,
+        # optional, specify document path for the Document node
+        # if not, a random name will be generated
+        # document_path="my_document.txt"
+        # optional, add document metadata, each item will
+        # be saved as a property of the Document node
+        # document_metadata={"author": "Frank Herbert"},
+    )
 
 
 async def main() -> PipelineResult:
diff --git a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
index dc875d7c2..76618efed 100644
--- a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
+++ b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
@@ -14,6 +14,7 @@
 #  limitations under the License.
 from __future__ import annotations
 
+from collections import defaultdict
 from typing import (
     Any,
     ClassVar,
@@ -336,17 +337,6 @@ def _get_connections(self) -> list[ConnectionDefinition]:
         return connections
 
     def get_run_params(self, user_input: dict[str, Any]) -> dict[str, Any]:
-        run_params = {}
-        if self.lexical_graph_config:
-            run_params["extractor"] = {
-                "lexical_graph_config": self.lexical_graph_config,
-            }
-            run_params["writer"] = {
-                "lexical_graph_config": self.lexical_graph_config,
-            }
-            run_params["pruner"] = {
-                "lexical_graph_config": self.lexical_graph_config,
-            }
         text = user_input.get("text")
         file_path = user_input.get("file_path")
         if not ((text is None) ^ (file_path is None)):
@@ -354,19 +344,32 @@ def get_run_params(self, user_input: dict[str, Any]) -> dict[str, Any]:
             raise PipelineDefinitionError(
                 "Use either 'text' (when from_pdf=False) or 'file_path' (when from_pdf=True) argument."
             )
+        run_params: dict[str, dict[str, Any]] = defaultdict(dict)
+        if self.lexical_graph_config:
+            run_params["extractor"]["lexical_graph_config"] = self.lexical_graph_config
+            run_params["writer"]["lexical_graph_config"] = self.lexical_graph_config
+            run_params["pruner"]["lexical_graph_config"] = self.lexical_graph_config
         if self.from_pdf:
             if not file_path:
                 raise PipelineDefinitionError(
                     "Expected 'file_path' argument when 'from_pdf' is True."
                 )
-            run_params["pdf_loader"] = {"filepath": file_path}
+            run_params["pdf_loader"]["filepath"] = file_path
+            run_params["pdf_loader"]["metadata"] = user_input.get("document_metadata")
         else:
             if not text:
                 raise PipelineDefinitionError(
                     "Expected 'text' argument when 'from_pdf' is False."
                 )
-            run_params["splitter"] = {"text": text}
+            run_params["splitter"]["text"] = text
             # Add full text to schema component for automatic schema extraction
             if not self.has_user_provided_schema():
-                run_params["schema"] = {"text": text}
+                run_params["schema"]["text"] = text
+            run_params["extractor"]["document_info"] = dict(
+                path=user_input.get(
+                    "document_path",
+                )
+                or "document.txt",
+                metadata=user_input.get("document_metadata"),
+            )
         return run_params
diff --git a/src/neo4j_graphrag/experimental/pipeline/kg_builder.py b/src/neo4j_graphrag/experimental/pipeline/kg_builder.py
index 68f579c8b..531331ff4 100644
--- a/src/neo4j_graphrag/experimental/pipeline/kg_builder.py
+++ b/src/neo4j_graphrag/experimental/pipeline/kg_builder.py
@@ -145,7 +145,11 @@ def __init__(
         self.runner = PipelineRunner.from_config(config)
 
     async def run_async(
-        self, file_path: Optional[str] = None, text: Optional[str] = None
+        self,
+        file_path: Optional[str] = None,
+        text: Optional[str] = None,
+        document_path: Optional[str] = None,
+        document_metadata: Optional[dict[str, Any]] = None,
     ) -> PipelineResult:
         """
         Asynchronously runs the knowledge graph building process.
@@ -153,8 +157,17 @@ async def run_async(
         Args:
             file_path (Optional[str]): The path to the PDF file to process. Required if `from_pdf` is True.
             text (Optional[str]): The text content to process. Required if `from_pdf` is False.
+            document_path (Optional[str]): The path to the document to process. Required if `from_pdf` is True.
+            document_metadata (Optional[dict[str, Any]]): The metadata to attach to the document.
 
         Returns:
             PipelineResult: The result of the pipeline execution.
         """
-        return await self.runner.run({"file_path": file_path, "text": text})
+        return await self.runner.run(
+            {
+                "file_path": file_path,
+                "text": text,
+                "document_path": document_path,
+                "document_metadata": document_metadata,
+            }
+        )
diff --git a/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py b/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py
index 40f5dae34..d8fd01f8f 100644
--- a/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py
+++ b/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py
@@ -286,16 +286,16 @@ def test_simple_kg_pipeline_config_connections_with_er() -> None:
 def test_simple_kg_pipeline_config_run_params_from_pdf_file_path() -> None:
     config = SimpleKGPipelineConfig(from_pdf=True)
     assert config.get_run_params({"file_path": "my_file"}) == {
-        "pdf_loader": {"filepath": "my_file"}
+        "pdf_loader": {"filepath": "my_file", "metadata": None}
     }
 
 
 def test_simple_kg_pipeline_config_run_params_from_text_text() -> None:
     config = SimpleKGPipelineConfig(from_pdf=False)
-    assert config.get_run_params({"text": "my text"}) == {
-        "splitter": {"text": "my text"},
-        "schema": {"text": "my text"},
-    }
+    run_params = config.get_run_params({"text": "my text"})
+    assert run_params["splitter"] == {"text": "my text"}
+    assert run_params["schema"] == {"text": "my text"}
+    assert run_params["extractor"]["document_info"]["path"] == "document.txt"
 
 
 def test_simple_kg_pipeline_config_run_params_from_pdf_text() -> None:
diff --git a/tests/unit/experimental/pipeline/test_kg_builder.py b/tests/unit/experimental/pipeline/test_kg_builder.py
index 62abc1c41..ff8ab3117 100644
--- a/tests/unit/experimental/pipeline/test_kg_builder.py
+++ b/tests/unit/experimental/pipeline/test_kg_builder.py
@@ -18,7 +18,9 @@
 import neo4j
 import pytest
 from neo4j_graphrag.embeddings import Embedder
-from neo4j_graphrag.experimental.components.types import LexicalGraphConfig
+from neo4j_graphrag.experimental.components.types import (
+    LexicalGraphConfig,
+)
 from neo4j_graphrag.experimental.pipeline.exceptions import PipelineDefinitionError
 from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
 from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
@@ -49,11 +51,14 @@ async def test_knowledge_graph_builder_document_info_with_file(_: Mock) -> None:
         "run",
         return_value=PipelineResult(run_id="test_run", result=None),
     ) as mock_run:
-        await kg_builder.run_async(file_path=file_path)
+        await kg_builder.run_async(
+            file_path=file_path,
+            document_metadata={"source": "google drive"}
+        )
 
         pipe_inputs = mock_run.call_args[1]["data"]
         assert "pdf_loader" in pipe_inputs
-        assert pipe_inputs["pdf_loader"] == {"filepath": file_path}
+        assert pipe_inputs["pdf_loader"] == {"filepath": file_path, "metadata": {"source": "google drive"}}
         assert "extractor" not in pipe_inputs
 
 
@@ -81,11 +86,19 @@ async def test_knowledge_graph_builder_document_info_with_text(_: Mock) -> None:
         "run",
         return_value=PipelineResult(run_id="test_run", result=None),
     ) as mock_run:
-        await kg_builder.run_async(text=text_input)
+        await kg_builder.run_async(
+            text=text_input,
+            document_path="my_document.txt",
+            document_metadata={"source": "google drive"},
+        )
 
         pipe_inputs = mock_run.call_args[1]["data"]
         assert "splitter" in pipe_inputs
         assert pipe_inputs["splitter"] == {"text": text_input}
+        assert pipe_inputs["extractor"]["document_info"]["path"] == "my_document.txt"
+        assert pipe_inputs["extractor"]["document_info"]["metadata"] == {
+            "source": "google drive"
+        }
 
 
 @mock.patch(
@@ -175,6 +188,6 @@ async def test_knowledge_graph_builder_with_lexical_graph_config(_: Mock) -> Non
 
         pipe_inputs = mock_run.call_args[1]["data"]
         assert "extractor" in pipe_inputs
-        assert pipe_inputs["extractor"] == {
-            "lexical_graph_config": lexical_graph_config
-        }
+        assert pipe_inputs["extractor"]["lexical_graph_config"] == lexical_graph_config
+        assert pipe_inputs["extractor"]["document_info"] is not None
+        assert pipe_inputs["extractor"]["document_info"]["path"] == "document.txt"

From 9f2252588edebebae70623b0bd64f26064a6f83a Mon Sep 17 00:00:00 2001
From: estelle <estelle.scifo@neo4j.com>
Date: Thu, 21 Aug 2025 11:00:20 +0200
Subject: [PATCH 2/8] Update doc

---
 docs/source/user_guide_kg_builder.rst              | 10 ++++++++++
 examples/build_graph/simple_kg_builder_from_pdf.py |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/source/user_guide_kg_builder.rst b/docs/source/user_guide_kg_builder.rst
index b574171bb..668ef72b1 100644
--- a/docs/source/user_guide_kg_builder.rst
+++ b/docs/source/user_guide_kg_builder.rst
@@ -219,6 +219,16 @@ chunk overlap in the text splitter component:
     )
 
 
+Run Parameters
+--------------
+
+SimpleKGPipeline also accepts addition runtime parameters:
+
+- ``document_path`` (str): only used when ``from_pdf=False``, this is the path property of the ``Document`` node.
+- ``document_metadata`` (dict): each item will be saved as a property attached to the ``Document`` node.
+
+
+
 Using a Config file
 ===================
 
diff --git a/examples/build_graph/simple_kg_builder_from_pdf.py b/examples/build_graph/simple_kg_builder_from_pdf.py
index 0f98e0e66..d3b2948f8 100644
--- a/examples/build_graph/simple_kg_builder_from_pdf.py
+++ b/examples/build_graph/simple_kg_builder_from_pdf.py
@@ -58,7 +58,7 @@ async def define_and_run_pipeline(
         file_path=str(file_path),
         # optional, add document metadata, each item will
         # be saved as a property of the Document node
-        document_metadata={"author": "J. K. Rowling"},
+        # document_metadata={"author": "J. K. Rowling"},
     )
 
 

From 1c5b0de4cb22a67969e0868b5e562b5b2f60c183 Mon Sep 17 00:00:00 2001
From: estelle <estelle.scifo@neo4j.com>
Date: Thu, 21 Aug 2025 11:00:55 +0200
Subject: [PATCH 3/8] Ruff

---
 tests/unit/experimental/pipeline/test_kg_builder.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/unit/experimental/pipeline/test_kg_builder.py b/tests/unit/experimental/pipeline/test_kg_builder.py
index ff8ab3117..d476d9d96 100644
--- a/tests/unit/experimental/pipeline/test_kg_builder.py
+++ b/tests/unit/experimental/pipeline/test_kg_builder.py
@@ -52,13 +52,15 @@ async def test_knowledge_graph_builder_document_info_with_file(_: Mock) -> None:
         return_value=PipelineResult(run_id="test_run", result=None),
     ) as mock_run:
         await kg_builder.run_async(
-            file_path=file_path,
-            document_metadata={"source": "google drive"}
+            file_path=file_path, document_metadata={"source": "google drive"}
         )
 
         pipe_inputs = mock_run.call_args[1]["data"]
         assert "pdf_loader" in pipe_inputs
-        assert pipe_inputs["pdf_loader"] == {"filepath": file_path, "metadata": {"source": "google drive"}}
+        assert pipe_inputs["pdf_loader"] == {
+            "filepath": file_path,
+            "metadata": {"source": "google drive"},
+        }
         assert "extractor" not in pipe_inputs
 
 

From 793ff8c2ff7bdc0c72198ec32ec330715075f1ea Mon Sep 17 00:00:00 2001
From: estelle <estelle.scifo@neo4j.com>
Date: Thu, 21 Aug 2025 18:35:10 +0200
Subject: [PATCH 4/8] Save document_type

---
 .../experimental/components/lexical_graph.py               | 1 +
 src/neo4j_graphrag/experimental/components/pdf_loader.py   | 1 +
 src/neo4j_graphrag/experimental/components/types.py        | 1 +
 .../pipeline/config/template_pipeline/simple_kg_builder.py | 1 +
 .../experimental/components/test_lexical_graph_builder.py  | 7 ++++++-
 5 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/neo4j_graphrag/experimental/components/lexical_graph.py b/src/neo4j_graphrag/experimental/components/lexical_graph.py
index bbe99b80b..fb5583c04 100644
--- a/src/neo4j_graphrag/experimental/components/lexical_graph.py
+++ b/src/neo4j_graphrag/experimental/components/lexical_graph.py
@@ -113,6 +113,7 @@ def create_document_node(self, document_info: DocumentInfo) -> Neo4jNode:
             properties={
                 "path": document_info.path,
                 "createdAt": datetime.datetime.now(datetime.timezone.utc).isoformat(),
+                "document_type": document_info.document_type,
                 **document_metadata,
             },
         )
diff --git a/src/neo4j_graphrag/experimental/components/pdf_loader.py b/src/neo4j_graphrag/experimental/components/pdf_loader.py
index 979cdb665..8bfe2502e 100644
--- a/src/neo4j_graphrag/experimental/components/pdf_loader.py
+++ b/src/neo4j_graphrag/experimental/components/pdf_loader.py
@@ -89,5 +89,6 @@ async def run(
             document_info=DocumentInfo(
                 path=filepath,
                 metadata=self.get_document_metadata(text, metadata),
+                document_type="pdf",
             ),
         )
diff --git a/src/neo4j_graphrag/experimental/components/types.py b/src/neo4j_graphrag/experimental/components/types.py
index 363767ef3..0062593a1 100644
--- a/src/neo4j_graphrag/experimental/components/types.py
+++ b/src/neo4j_graphrag/experimental/components/types.py
@@ -38,6 +38,7 @@ class DocumentInfo(DataModel):
     path: str
     metadata: Optional[Dict[str, str]] = None
     uid: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    document_type: Optional[document_type] = None
 
     @property
     def document_id(self) -> str:
diff --git a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
index 76618efed..832a70ca5 100644
--- a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
+++ b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
@@ -371,5 +371,6 @@ def get_run_params(self, user_input: dict[str, Any]) -> dict[str, Any]:
                 )
                 or "document.txt",
                 metadata=user_input.get("document_metadata"),
+                document_type="inline_text",
             )
         return run_params
diff --git a/tests/unit/experimental/components/test_lexical_graph_builder.py b/tests/unit/experimental/components/test_lexical_graph_builder.py
index 4621c77e9..788855f61 100644
--- a/tests/unit/experimental/components/test_lexical_graph_builder.py
+++ b/tests/unit/experimental/components/test_lexical_graph_builder.py
@@ -78,7 +78,11 @@ async def test_lexical_graph_builder_run_with_document() -> None:
                 TextChunk(text="text chunk 1", index=1),
             ]
         ),
-        document_info=DocumentInfo(path="test_lexical_graph", uid=doc_uid),
+        document_info=DocumentInfo(
+            path="test_lexical_graph",
+            uid=doc_uid,
+            document_type="my_type",
+        ),
     )
     assert isinstance(result, GraphResult)
     graph = result.graph
@@ -89,6 +93,7 @@ async def test_lexical_graph_builder_run_with_document() -> None:
     assert document.label == DEFAULT_DOCUMENT_NODE_LABEL
     assert document.properties["path"] == "test_lexical_graph"
     assert document.properties["createdAt"] is not None
+    assert document.properties["document_type"] == "my_type"
     chunk1 = nodes[1]
     assert chunk1.label == DEFAULT_CHUNK_NODE_LABEL
     chunk2 = nodes[2]

From 9e151d13a9484b36e6f70013787a429987c1ecd3 Mon Sep 17 00:00:00 2001
From: estelle <estelle.scifo@neo4j.com>
Date: Wed, 27 Aug 2025 16:23:34 +0200
Subject: [PATCH 5/8] Fix type

---
 src/neo4j_graphrag/experimental/components/types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/neo4j_graphrag/experimental/components/types.py b/src/neo4j_graphrag/experimental/components/types.py
index 0062593a1..b5ce07706 100644
--- a/src/neo4j_graphrag/experimental/components/types.py
+++ b/src/neo4j_graphrag/experimental/components/types.py
@@ -38,7 +38,7 @@ class DocumentInfo(DataModel):
     path: str
     metadata: Optional[Dict[str, str]] = None
     uid: str = Field(default_factory=lambda: str(uuid.uuid4()))
-    document_type: Optional[document_type] = None
+    document_type: Optional[str] = None
 
     @property
     def document_id(self) -> str:

From 88f65e56c83b48c4567f29250011a78b52a2b81f Mon Sep 17 00:00:00 2001
From: estelle <estelle.scifo@neo4j.com>
Date: Tue, 2 Sep 2025 15:31:52 +0200
Subject: [PATCH 6/8] Reuse file_path instead of introducing another
 document_path parameter

---
 docs/source/user_guide_kg_builder.rst                  |  1 -
 examples/build_graph/simple_kg_builder_from_text.py    |  4 ++--
 .../config/template_pipeline/simple_kg_builder.py      |  6 +++---
 src/neo4j_graphrag/experimental/pipeline/kg_builder.py |  5 +----
 .../config/template_pipeline/test_simple_kg_builder.py | 10 ----------
 tests/unit/experimental/pipeline/test_kg_builder.py    |  2 +-
 6 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/docs/source/user_guide_kg_builder.rst b/docs/source/user_guide_kg_builder.rst
index 668ef72b1..65ccbd82d 100644
--- a/docs/source/user_guide_kg_builder.rst
+++ b/docs/source/user_guide_kg_builder.rst
@@ -224,7 +224,6 @@ Run Parameters
 
 SimpleKGPipeline also accepts addition runtime parameters:
 
-- ``document_path`` (str): only used when ``from_pdf=False``, this is the path property of the ``Document`` node.
 - ``document_metadata`` (dict): each item will be saved as a property attached to the ``Document`` node.
 
 
diff --git a/examples/build_graph/simple_kg_builder_from_text.py b/examples/build_graph/simple_kg_builder_from_text.py
index 330a3a8bd..18adde4ac 100644
--- a/examples/build_graph/simple_kg_builder_from_text.py
+++ b/examples/build_graph/simple_kg_builder_from_text.py
@@ -81,9 +81,9 @@ async def define_and_run_pipeline(
     )
     return await kg_builder.run_async(
         text=TEXT,
-        # optional, specify document path for the Document node
+        # optional, specify file path for the Document node
         # if not, a random name will be generated
-        # document_path="my_document.txt"
+        # file_path="my_document.txt"
         # optional, add document metadata, each item will
         # be saved as a property of the Document node
         # document_metadata={"author": "Frank Herbert"},
diff --git a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
index 832a70ca5..fef907d87 100644
--- a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
+++ b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
@@ -339,8 +339,8 @@ def _get_connections(self) -> list[ConnectionDefinition]:
     def get_run_params(self, user_input: dict[str, Any]) -> dict[str, Any]:
         text = user_input.get("text")
         file_path = user_input.get("file_path")
-        if not ((text is None) ^ (file_path is None)):
-            # exactly one of text or user_input must be set
+        if text is None and file_path is None:
+            # use must provide either text or file_path or both
             raise PipelineDefinitionError(
                 "Use either 'text' (when from_pdf=False) or 'file_path' (when from_pdf=True) argument."
             )
@@ -367,7 +367,7 @@ def get_run_params(self, user_input: dict[str, Any]) -> dict[str, Any]:
                 run_params["schema"]["text"] = text
             run_params["extractor"]["document_info"] = dict(
                 path=user_input.get(
-                    "document_path",
+                    "file_path",
                 )
                 or "document.txt",
                 metadata=user_input.get("document_metadata"),
diff --git a/src/neo4j_graphrag/experimental/pipeline/kg_builder.py b/src/neo4j_graphrag/experimental/pipeline/kg_builder.py
index 531331ff4..b7313b3b0 100644
--- a/src/neo4j_graphrag/experimental/pipeline/kg_builder.py
+++ b/src/neo4j_graphrag/experimental/pipeline/kg_builder.py
@@ -148,16 +148,14 @@ async def run_async(
         self,
         file_path: Optional[str] = None,
         text: Optional[str] = None,
-        document_path: Optional[str] = None,
         document_metadata: Optional[dict[str, Any]] = None,
     ) -> PipelineResult:
         """
         Asynchronously runs the knowledge graph building process.
 
         Args:
-            file_path (Optional[str]): The path to the PDF file to process. Required if `from_pdf` is True.
+            file_path (Optional[str]): The path to the PDF file to process. Required if `from_pdf` is True. If `from_pdf` is False, can be used to set the Document node path property.
             text (Optional[str]): The text content to process. Required if `from_pdf` is False.
-            document_path (Optional[str]): The path to the document to process. Required if `from_pdf` is True.
             document_metadata (Optional[dict[str, Any]]): The metadata to attach to the document.
 
         Returns:
@@ -167,7 +165,6 @@ async def run_async(
             {
                 "file_path": file_path,
                 "text": text,
-                "document_path": document_path,
                 "document_metadata": document_metadata,
             }
         )
diff --git a/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py b/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py
index d8fd01f8f..916f9395f 100644
--- a/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py
+++ b/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py
@@ -322,16 +322,6 @@ def test_simple_kg_pipeline_config_run_params_no_file_no_text() -> None:
     )
 
 
-def test_simple_kg_pipeline_config_run_params_both_file_and_text() -> None:
-    config = SimpleKGPipelineConfig(from_pdf=False)
-    with pytest.raises(PipelineDefinitionError) as excinfo:
-        config.get_run_params({"text": "my text", "file_path": "my file"})
-    assert (
-        "Use either 'text' (when from_pdf=False) or 'file_path' (when from_pdf=True) argument."
-        in str(excinfo)
-    )
-
-
 def test_simple_kg_pipeline_config_process_schema_with_precedence_legacy() -> None:
     entities: list[EntityInputType] = [
         "Person",
diff --git a/tests/unit/experimental/pipeline/test_kg_builder.py b/tests/unit/experimental/pipeline/test_kg_builder.py
index d476d9d96..8634a1e4e 100644
--- a/tests/unit/experimental/pipeline/test_kg_builder.py
+++ b/tests/unit/experimental/pipeline/test_kg_builder.py
@@ -90,7 +90,7 @@ async def test_knowledge_graph_builder_document_info_with_text(_: Mock) -> None:
     ) as mock_run:
         await kg_builder.run_async(
             text=text_input,
-            document_path="my_document.txt",
+            file_path="my_document.txt",
             document_metadata={"source": "google drive"},
         )
 

From a740640f8ab16427fc5c8c545a00c1b4993f538b Mon Sep 17 00:00:00 2001
From: estelle <estelle.scifo@neo4j.com>
Date: Sun, 14 Sep 2025 10:25:21 +0200
Subject: [PATCH 7/8] Address comments

---
 .../pipeline/config/template_pipeline/simple_kg_builder.py    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
index fef907d87..41ac7557f 100644
--- a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
+++ b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
@@ -340,9 +340,9 @@ def get_run_params(self, user_input: dict[str, Any]) -> dict[str, Any]:
         text = user_input.get("text")
         file_path = user_input.get("file_path")
         if text is None and file_path is None:
-            # use must provide either text or file_path or both
+            # user must provide either text or file_path or both
             raise PipelineDefinitionError(
-                "Use either 'text' (when from_pdf=False) or 'file_path' (when from_pdf=True) argument."
+                "At least one of `text` (when from_pdf=False) or 'file_path' (when from_pdf=True) argument must be provided."
             )
         run_params: dict[str, dict[str, Any]] = defaultdict(dict)
         if self.lexical_graph_config:

From 36d9a035ab803cb93a46416c0c4c5fc9cdc17f84 Mon Sep 17 00:00:00 2001
From: estelle <estelle.scifo@neo4j.com>
Date: Sun, 14 Sep 2025 13:43:01 +0200
Subject: [PATCH 8/8] Fix CI

---
 .../config/template_pipeline/simple_kg_builder.py    |  2 +-
 .../template_pipeline/test_simple_kg_builder.py      | 12 +++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
index 41ac7557f..929111898 100644
--- a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
+++ b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
@@ -342,7 +342,7 @@ def get_run_params(self, user_input: dict[str, Any]) -> dict[str, Any]:
         if text is None and file_path is None:
             # user must provide either text or file_path or both
             raise PipelineDefinitionError(
-                "At least one of `text` (when from_pdf=False) or 'file_path' (when from_pdf=True) argument must be provided."
+                "At least one of `text` (when from_pdf=False) or `file_path` (when from_pdf=True) argument must be provided."
             )
         run_params: dict[str, dict[str, Any]] = defaultdict(dict)
         if self.lexical_graph_config:
diff --git a/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py b/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py
index 916f9395f..2c8d2a0fd 100644
--- a/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py
+++ b/tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py
@@ -12,6 +12,7 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+import re
 from unittest.mock import Mock, patch
 
 import neo4j
@@ -314,12 +315,13 @@ def test_simple_kg_pipeline_config_run_params_from_text_file_path() -> None:
 
 def test_simple_kg_pipeline_config_run_params_no_file_no_text() -> None:
     config = SimpleKGPipelineConfig(from_pdf=False)
-    with pytest.raises(PipelineDefinitionError) as excinfo:
+    with pytest.raises(
+        PipelineDefinitionError,
+        match=re.escape(
+            "At least one of `text` (when from_pdf=False) or `file_path` (when from_pdf=True) argument must be provided."
+        ),
+    ):
         config.get_run_params({})
-    assert (
-        "Use either 'text' (when from_pdf=False) or 'file_path' (when from_pdf=True) argument."
-        in str(excinfo)
-    )
 
 
 def test_simple_kg_pipeline_config_process_schema_with_precedence_legacy() -> None: