In [6]:
%%capture
%pip install neo4j langchain langchain-community

In [25]:
from neo4j import GraphDatabase
import os
from dotenv import load_dotenv
load_dotenv()

import os
NEO4J_URI = os.environ["NEO4J_URI"]
NEO4J_USERNAME= os.environ["NEO4J_USERNAME"]
NEO4J_PASSWORD= os.environ["NEO4J_PASSWORD"]

openai_api_key = os.environ["OPENAI_API_KEY"]
import os
csv_path = os.path.abspath(os.path.join(os.getcwd(), "..", "Data", "Netflix_raw.csv"))


In [27]:
# Initialize the LLM
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4o", temperature=0.0)


In [None]:
from neo4j import GraphDatabase
import pandas as pd

class NetflixGraphDB:
    def __init__(self, NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD):
        self.driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
        print("✅ Connected to Neo4j")

    def close(self):
        self.driver.close()
        print("🔌 Connection closed")

    def clear_database(self):
        with self.driver.session() as session:
            session.run("MATCH (n) DETACH DELETE n")
            print("🧹 Cleared entire graph")

    def create_movie_node(self, row):
        with self.driver.session() as session:
            session.run(
                """
                MERGE (m:Movie {
                    title: $title,
                    release_year: $release_year,
                    duration: $duration,
                    rating: $rating,
                    description: $description
                })
                """,
                {
                    "title": row.get("title"),
                    "release_year": row.get("release_year"),
                    "duration": row.get("duration"),
                    "rating": row.get("rating"),
                    "description": row.get("description")
                }
            )


In [52]:
from neo4j import GraphDatabase
import pandas as pd
from langchain_neo4j import Neo4jGraph
import time
from tqdm import tqdm 

class NetflixGraphDB:
    def __init__(self, NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD):
        self.driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
        print("✅ Connected to Neo4j")

    def close(self):
        self.driver.close()
        print("🔌 Connection closed")

    def clear_database(self):
        with self.driver.session() as session:
            session.run("MATCH (n) DETACH DELETE n")
            print("🧹 Cleared entire graph")

    def create_movie_node(self, row):
        # Skip invalid rows
        if not row.get("title"):
            print("⚠️ Skipping row with missing title.")
            return

        # Replace NaNs with None (Neo4j supports null in SET, not in MERGE)
        row = row.fillna(value=None)

        with self.driver.session() as session:
            session.run(
                """
                MERGE (m:Movie {title: $title})
                SET m.release_year = $release_year,
                    m.duration = $duration,
                    m.rating = $rating,
                    m.description = $description
                """,
                {
                    "title": row.get("title"),
                    "release_year": row.get("release_year"),
                    "duration": row.get("duration"),
                    "rating": row.get("rating"),
                    "description": row.get("description")
                }
            )

    def create_person_and_relationship(self, title, name, relationship_type):
        if not name: return
        with self.driver.session() as session:
            session.run(
                f"""
                MERGE (p:Person {{name: $name}})
                MERGE (m:Movie {{title: $title}})
                MERGE (p)-[:{relationship_type}]->(m)
                """,
                {"name": name, "title": title}
            )

    def create_genre_relationship(self, title, genre):
        if not genre: return
        with self.driver.session() as session:
            session.run(
                """
                MERGE (g:Genre {name: $genre})
                MERGE (m:Movie {title: $title})
                MERGE (m)-[:IN_GENRE]->(g)
                """,
                {"genre": genre, "title": title}
            )

    
    def load_movies_from_csv(self, csv_path):
        # Optional: install via pip install tqdm

        df = pd.read_csv(csv_path)
        df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]
        
        print(f"📄 Loading {len(df)} movie rows from: {csv_path}")

        with self.driver.session() as session:
            for _, row in tqdm(df.iterrows(), total=len(df), desc="🚀 Ingesting movies"):
                row = row.where(pd.notnull(row), None)
                title = row.get("title")

                if not title:
                    print("⚠️ Skipping movie with missing title.")
                    continue

                # Create the movie node
                session.run(
                    """
                    MERGE (m:Movie {title: $title})
                    SET m.release_year = $release_year,
                        m.duration = $duration,
                        m.rating = $rating,
                        m.description = $description
                    """,
                    {
                        "title": title,
                        "release_year": row.get("release_year"),
                        "duration": row.get("duration"),
                        "rating": row.get("rating"),
                        "description": row.get("description")
                    }
                )

                # Add actors
                for actor in str(row.get("actors", "")).split("|"):
                    actor = actor.strip()
                    if actor:
                        session.run(
                            """
                            MERGE (p:Person {name: $name})
                            MERGE (m:Movie {title: $title})
                            MERGE (p)-[:ACTED_IN]->(m)
                            """,
                            {"name": actor, "title": title}
                        )

                # Add directors
                for director in str(row.get("director", "")).split("|"):
                    director = director.strip()
                    if director:
                        session.run(
                            """
                            MERGE (p:Person {name: $name})
                            MERGE (m:Movie {title: $title})
                            MERGE (p)-[:DIRECTED]->(m)
                            """,
                            {"name": director, "title": title}
                        )

                # Add genres
                for genre in str(row.get("genres", "")).split("|"):
                    genre = genre.strip()
                    if genre:
                        session.run(
                            """
                            MERGE (g:Genre {name: $genre})
                            MERGE (m:Movie {title: $title})
                            MERGE (m)-[:IN_GENRE]->(g)
                            """,
                            {"genre": genre, "title": title}
                        )

        print("✅ All movies, actors, directors, and genres loaded.")
        
    def run_query(self, cypher_query, parameters=None):
        with self.driver.session() as session:
            result = session.run(cypher_query, parameters or {})
            return [record.data() for record in result]

In [37]:
# Create an instance of NetflixGraphDB
db = NetflixGraphDB(NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD)

db.clear_database()  # Optional: wipe old data
db.load_movies_from_csv(csv_path)

# Always close when done
db.close()

✅ Connected to Neo4j
🧹 Cleared entire graph


KeyboardInterrupt: 

In [66]:
from langchain_openai import ChatOpenAI
from langchain_community.graphs.neo4j_graph import Neo4jGraph
from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain

llm = ChatOpenAI(model="gpt-4o", openai_api_key=os.getenv("OPENAI_API_KEY"))

graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD")
)

chain = CypherQAChain.from_llm(
    graph=graph,
    llm=llm,
    verbose=True,
    allow_dangerous_operations=True  # ✅ Note: name changed from previous version
)

response = chain.invoke({"query": "Which movies were directed by Christopher Nolan?"})
print("🎬 Answer:", response["result"].strip())



NameError: name 'CypherQAChain' is not defined

In [54]:
query = "Which movies were directed by Christopher Nolan?"
result = run_nl_query(
    query=query,
    NEO4J_URI=NEO4J_URI,
    NEO4J_USERNAME=NEO4J_USERNAME,
    NEO4J_PASSWORD=NEO4J_PASSWORD,
    openai_api_key=openai_api_key
)
print("🎥 Answer:", result)

ValidationError: 1 validation error for GraphCypherQAChain
graph
  Input should be an instance of GraphStore [type=is_instance_of, input_value=<langchain_neo4j.graphs.n...bject at 0x7e3881b11630>, input_type=Neo4jGraph]
    For further information visit https://errors.pydantic.dev/2.11/v/is_instance_of