In [1]:
import os
import getpass
from dotenv import load_dotenv

#get env setup
load_dotenv('podcast-gds.env', override=True)

if not os.environ.get('NEO4J_URI'):
    os.environ['NEO4J_URI'] = getpass.getpass('NEO4J_URI:\n')
if not os.environ.get('NEO4J_USERNAME'):
    os.environ['NEO4J_USERNAME'] = getpass.getpass('NEO4J_USERNAME:\n')
if not os.environ.get('NEO4J_PASSWORD'):
    os.environ['NEO4J_PASSWORD'] = getpass.getpass('NEO4J_PASSWORD:\n')

NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')

In [3]:
from neo4j import GraphDatabase

# load into People nodes in Neo4j

#instantiate driver
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

#test neo4j connection
driver.execute_query("MATCH(n) RETURN count(n)")

EagerResult(records=[<Record count(n)=419>], summary=<neo4j._work.summary.ResultSummary object at 0x1132fe210>, keys=['count(n)'])

In [4]:
import os
import os.path as osp
from langchain_core.documents import Document
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from langchain_neo4j import Neo4jGraph


BASE_DIR = "/Users/sangeethar/workspace/AI-Workspace/neo4j-employee-graph/neo4j-employee-graph/input-podcast-episodes-data/"

# Already Processed
#osp.join(BASE_DIR, "AnthropicAndModelContextProtocol-MCP-WithDavidSoriaParra-ep1836.txt"),
#osp.join(BASE_DIR, "delayed-view-semantics-incremental-data-processing-ep473.txt"),
#osp.join(BASE_DIR, "DuckLake-SimplifyingLakehouseEcosystem-ep480.txt"),
#osp.join(BASE_DIR, "kuzudb-embeddable-graph-database-ep477.txt"),

FILE_PATHS = [
    osp.join(BASE_DIR, "PromptsAsFunctions-BAML-Revolution_AI-Engineering.txt"),
    osp.join(BASE_DIR, "Iceberg-At-NetflixAndBeyond-RyanBlue-1654.txt")
]

# Initialize the LLM and graph transformer once
llm = ChatOpenAI(temperature=0, model_name="gpt-4o-mini")
llm_transformer = LLMGraphTransformer(llm=llm)

# Initialize Neo4j graph connection once
graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
)

for file_path in FILE_PATHS:
    print(f"Processing file: {file_path}")
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()

    # Wrap text in a Document
    documents = [Document(page_content=text)]

    # Extract graph documents (nodes and relationships) from text
    graph_documents = llm_transformer.convert_to_graph_documents(documents)

    # Store extracted graph documents into Neo4j
    graph.add_graph_documents(graph_documents)

    print(f"Stored {len(graph_documents[0].nodes)} nodes and {len(graph_documents[0].relationships)} relationships from {osp.basename(file_path)}.")

print("All files processed and stored in Neo4j.")


Processing file: /Users/sangeethar/workspace/AI-Workspace/neo4j-employee-graph/neo4j-employee-graph/input-podcast-episodes-data/PromptsAsFunctions-BAML-Revolution_AI-Engineering.txt
Stored 17 nodes and 16 relationships from PromptsAsFunctions-BAML-Revolution_AI-Engineering.txt.
Processing file: /Users/sangeethar/workspace/AI-Workspace/neo4j-employee-graph/neo4j-employee-graph/input-podcast-episodes-data/Iceberg-At-NetflixAndBeyond-RyanBlue-1654.txt
Stored 28 nodes and 27 relationships from Iceberg-At-NetflixAndBeyond-RyanBlue-1654.txt.
All files processed and stored in Neo4j.
