# Demo

This notebook demos:
- The easiest and fastest way to get started with SimpleKGPipeline
- Controlling the schema to improve the Entity and Relation extraction
- Updating the Entity and Relation Extraction to domain specific needs

## Initial setup

Reading settings from `.env` file.

In [None]:
import os

from dotenv import load_dotenv
import neo4j
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.embeddings import OpenAIEmbeddings
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline

In [None]:
load_dotenv()

In [None]:
file_path = "./data/Climate change - Wikipedia long.pdf"

Instantiate required objects: neo4j driver, an LLM and an embedder (for chunk embeddings)

In [None]:
driver = neo4j.GraphDatabase.driver(
    os.getenv("NEO4J_URI", "bolt://localhost:7687"),
    auth=(
        os.getenv("NEO4J_USERNAME", "neo4j"),
        os.getenv("NEO4J_PASSWORD", "neo4j")
    )
)

llm = OpenAILLM(
    model_name="gpt-4o",
    model_params={
        "temperature": 0,
        "response_format": {"type": "json_object"}
    }
)

embedder = OpenAIEmbeddings(
    model="text-embedding-3-small",
)

## Quick start: Run SimpleKGPipeline

In [None]:
pipeline = SimpleKGPipeline(
    driver=driver,
    llm=llm,
    embedder=embedder,
)

In [None]:
await pipeline.run_async(
    file_path=file_path,
    document_metadata={
        "source": "Wikipedia",
    }
);

## Control the schema

In [None]:
driver.execute_query("MATCH (n) DETACH DELETE n");

In [None]:
from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
loader = PdfLoader()
document = await loader.run(filepath=file_path)

In [None]:
from neo4j_graphrag.experimental.components.schema import SchemaFromTextExtractor

In [None]:
extractor = SchemaFromTextExtractor(
    llm=llm,
)
schema = await extractor.run(text=document.text)
schema

In [None]:
from neo4j_graphrag.experimental.utils.schema import schema_visualization

In [None]:
VG = schema_visualization(schema)
VG.render()

In [None]:
schema.save("my_schema.json", overwrite=True)


Make some changes to the JSON file

In [None]:
from neo4j_graphrag.experimental.components.schema import GraphSchema
new_schema = GraphSchema.from_file("refined_schema.json")

VG = schema_visualization(new_schema)
VG.render()

In [None]:
pipeline = SimpleKGPipeline(
    driver=driver,
    llm=llm,
    embedder=embedder,
    schema=new_schema,
)
await pipeline.run_async(
    file_path=file_path,
    document_metadata={
        "source": "Wikipedia",
        "schema": "edited schema",
    }
);

## Update the Entity extraction prompt

In [None]:
driver.execute_query("MATCH (n) DETACH DELETE n");

In [None]:
from neo4j_graphrag.generation.prompts import ERExtractionTemplate
additional_guidance = """
NAMING RULES:
- Use canonical short names whenever possible:
  * GreenhouseGas: CO2, CH4, N2O, H2O
  * ClimateEvent: Global warming, Sea level rise, Ocean heat content rise
  * RenewableSource: Solar power, Wind power, Hydropower, Renewable energy
  * HumanActivity: Fossil fuel combustion, Deforestation, Agriculture
  * Country: use common country names (France, India, China, etc.)
  * Agreement: Paris Agreement, Kyoto Protocol
- Avoid synonyms or redundant variants (e.g., “carbon dioxide” → “CO2”).
- Keep capitalization consistent and concise.
"""
er_prompt = ERExtractionTemplate().template + additional_guidance
pipeline = SimpleKGPipeline(
    driver=driver,
    llm=llm,
    embedder=embedder,
    schema=new_schema,
    prompt_template=er_prompt,
)
await pipeline.run_async(
    file_path=file_path,
    document_metadata={
        "source": "Wikipedia",
    },
);