In [None]:
import os
from dotenv import load_dotenv
from neo4j import GraphDatabase
import pandas as pd
from pathlib import Path

print(os.getcwd())

env_path = Path('..') / '.env.local'
load_dotenv(dotenv_path=env_path)

# Retrieve credentials
uri = os.getenv("NEO4J_URI")
username = os.getenv("NEO4J_USER")
password = os.getenv("NEO4J_PASSWORD")

print(uri)

driver = GraphDatabase.driver(uri, auth=(username, password))

In [None]:
# Index parameters
INDEX_NAME = "voice-vector-index"
NODE_LABEL = "Voice"
PROPERTY_NAME = "embedding"
DIMENSIONS = 3072  # For OpenAI "text-embedding-3-large"
SIMILARITY_FUNCTION = "cosine"  # or 'euclidean' or 'dot'

# Create the index
def create_vector_index(driver):
    cypher = f"""
    CALL db.index.vector.createNodeIndex(
        '{INDEX_NAME}',
        '{NODE_LABEL}',
        '{PROPERTY_NAME}',
        {DIMENSIONS},
        '{SIMILARITY_FUNCTION}'
    )
    """
    with driver.session() as session:
        try:
            session.run(cypher)
            print(f"✅ Vector index '{INDEX_NAME}' created successfully.")
        except Exception as e:
            print(f"❌ Failed to create vector index: {e}")

In [None]:
create_vector_index(driver)

In [None]:
from neo4j import GraphDatabase
from neo4j_graphrag.retrievers import VectorRetriever
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.generation import GraphRAG
from neo4j_graphrag.embeddings import OpenAIEmbeddings

In [None]:
embedder = OpenAIEmbeddings(model="text-embedding-3-large")

# Initialize the retriever
retriever = VectorRetriever(driver, INDEX_NAME, embedder)

# 3. LLM
# Note: the OPENAI_API_KEY must be in the env vars
llm = OpenAILLM(model_name="gpt-4o", model_params={"temperature": 0})

# Initialize the RAG pipeline
rag = GraphRAG(retriever=retriever, llm=llm)

In [None]:
# Query the graph
query_text = "What were some of the main themes of value? Use only the context provided."
response = rag.search(query_text=query_text, retriever_config={"top_k": 5})
print(response.answer)

Below currently doesn't work as we need to chunk large transcripts.

In [None]:
from neo4j_graphrag.retrievers import VectorCypherRetriever

# Cypher to retrieve only facilitator voice reflections
retrieval_query = """
MATCH (v:Voice)<-[:HAS_VOICE]-(e:Entry)-[:SENT_BY]->(p:Participant)
WHERE p.role = 'facilitator'
RETURN v.transcription AS content
"""

# Set up the retriever
retriever = VectorCypherRetriever(
    driver=driver,
    index_name="voice-vector-index",       # Make sure this matches your vector index name
    retrieval_query=retrieval_query,
    embedder=embedder
)

# Re-initialize the RAG pipeline
rag = GraphRAG(retriever=retriever, llm=llm)

# Example query
response = rag.search(query_text="How did facilitators respond to uncertainty?", retriever_config={"top_k": 2})

# Print results
print(response.answer)