**Install the required Python packages**

```bash
pip install -r requirements.txt
```


In [1]:
from typing import List
import pandas as pd
from neo4j import GraphDatabase
from langchain_openai import ChatOpenAI
from langchain_community.graphs.neo4j_graph import Neo4jGraph
from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_experimental.graph_transformers import LLMGraphTransformer
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

True

In [2]:
# Neo4j credentials
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
# Initialize Neo4j driver
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
database = "pdfgraphrag"  # Specify custom database name
embeddings = OpenAIEmbeddings()
neo4j_url=NEO4J_URI
neo4j_username=NEO4J_USER
neo4j_password=NEO4J_PASSWORD

In [4]:
# Create the database if it doesn't exist
try:
    with driver.session(database="system") as session:
        # Check if database exists
        result = session.run("SHOW DATABASES")
        databases = [record["name"] for record in result]

        if database not in databases:
            print(f"Creating database: {database}")
            session.run(f"CREATE DATABASE {database}")
            print(f"Database {database} created successfully")
        else:
            print(f"Database {database} already exists")

except Exception as e:
    print(f"Error creating database: {str(e)}")
    raise

# Test connection
try:
    driver.verify_connectivity()
    print("Successfully connected to Neo4j!")
except Exception as e:
    print(f"Failed to connect to Neo4j: {str(e)}")
    raise

# Initialize Neo4jGraph with custom database
graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USER,
    password=NEO4J_PASSWORD,
    database=database,
)

Database pdfgraphrag already exists
Successfully connected to Neo4j!


In [5]:
# Initialize LLM
llm = ChatOpenAI(
    temperature=0, model_name="gpt-4o", openai_api_key=OPENAI_API_KEY
)


In [6]:
# Load and split the PDF
loader = PyPDFLoader("Brief - Chips Ahoy!.pdf")
pages = loader.load_and_split()

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 13 0 (offset 0)


In [7]:
for i, page in enumerate(pages):# Check if page is even
        print(f"Page {i}: {page}")

Page 0: page_content='Brand Brief: Chips Ahoy!  Executive Summary: Chips Ahoy! is an iconic cookie brand synonymous with fun, indulgence, and happiness. Baked with irresistible chocolate chips in every bite, these delicious cookies elevate everyday moments into bursts of joy. Whether enjoyed solo or shared with friends, Chips Ahoy! delivers a satisfyingly crunchy and chocolatey experience that resonates particularly with Gen Z consumers. Chip is the brand mascot for Chips Ahoy! Cookies. Brand Description: • Beloved for its irresistible taste and playful spirit. • Creates a burst of happiness and sweetness in everyday moments. • Offers a comforting and satisfyingly crunchy, chocolatey experience. Key Brand Features Tags: • Fun: Chips Ahoy! injects playfulness into snacking, making any moment more enjoyable. • Delicious: Unmatched taste with a perfect balance of sweetness and chocolate that tantalizes taste buds and leaves consumers wanting more. • Happy: Chips Ahoy! strives to be a sour

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=40)
docs = text_splitter.split_documents(pages)

In [9]:
lc_docs = []
for doc in docs:
    lc_docs.append(Document(page_content=doc.page_content.replace("\n", ""), 
    metadata={'source': "Brief - Chips Ahoy!.pdf"}))

In [10]:
# Clear the graph database
cypher = """
    MATCH (n)
    DETACH DELETE n;
"""
graph.query(cypher)

[]

In [11]:
 # Define allowed nodes and relationships
allowed_nodes = ["Brand", "Character", "Characteristic", "Communication Channel", 
                 "Concept", "Demographic","Mascot", "Person", "Product"]
allowed_relationships = ["GIVES", "HAS_CHARACTERISTIC", "HAS_MASCOT", "REPRESENTS", "TARGETS", "USES_CHANNEL"]

In [12]:
# Transform documents into graph documents
transformer = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=allowed_nodes,
    allowed_relationships=allowed_relationships,
    node_properties=False, 
    relationship_properties=False
) 


In [13]:
graph_documents = transformer.convert_to_graph_documents(lc_docs)
graph.add_graph_documents(graph_documents, include_source=True)

In [14]:
 # Use the stored connection parameters
index = Neo4jVector.from_existing_graph(
    embedding=embeddings,
    url=neo4j_url,
    username=neo4j_username,
    password=neo4j_password,
    database=database,
    node_label="Brand",  # Adjust node_label as needed
    text_node_properties=["id", "text"], 
    embedding_node_property="embedding", 
    index_name="vector_index", 
    keyword_index_name="entity_index", 
    search_type="hybrid" 
)


In [15]:
# Retrieve the graph schema
graph.refresh_schema()
schema = graph.get_schema
print(schema)

Node properties:
Document {id: STRING, text: STRING, source: STRING}
Brand {id: STRING, embedding: LIST}
Characteristic {id: STRING}
Product {id: STRING}
Demographic {id: STRING}
Mascot {id: STRING}
Communication channel {id: STRING}
Concept {id: STRING}
Relationship properties:

The relationships:
(:Document)-[:MENTIONS]->(:Brand)
(:Document)-[:MENTIONS]->(:Characteristic)
(:Document)-[:MENTIONS]->(:Product)
(:Document)-[:MENTIONS]->(:Mascot)
(:Document)-[:MENTIONS]->(:Demographic)
(:Document)-[:MENTIONS]->(:Communication channel)
(:Document)-[:MENTIONS]->(:Concept)
(:Brand)-[:HAS_CHARACTERISTIC]->(:Characteristic)
(:Brand)-[:USES_CHANNEL]->(:Communication channel)
(:Brand)-[:USES_CHANNEL]->(:Concept)
(:Brand)-[:TARGETS]->(:Demographic)
(:Brand)-[:HAS_MASCOT]->(:Mascot)
(:Brand)-[:REPRESENTS]->(:Concept)
(:Characteristic)-[:TARGETS]->(:Demographic)
(:Characteristic)-[:HAS_CHARACTERISTIC]->(:Characteristic)
(:Product)-[:HAS_CHARACTERISTIC]->(:Characteristic)
(:Demographic)-[:HAS_CHARAC

In [16]:
def setup_qa_chain():
        """Set up the question-answering chain"""
        try:
            qa_chain = GraphCypherQAChain.from_llm(
                llm=llm,
                graph=graph,
                verbose=True,
                allow_dangerous_requests=True,
                top_k=10,  # Return more results
            )
            print("Successfully set up QA chain")
            return qa_chain
        except Exception as e:
            print(f"Error setting up QA chain: {str(e)}")
            raise

In [17]:
def ask_question(qa_chain, question: str) -> str:
        """Ask a question about the mobile usage data"""
        try:
            return qa_chain.invoke(question)
        except Exception as e:
            print(f"Error processing question: {str(e)}")
            return f"Error: {str(e)}"

In [18]:
def cleanup_database():
        """Clean up the database when done"""
        try:
            with driver.session(database="system") as session:
                # Stop the database before dropping
                session.run(f"STOP DATABASE {database}")
                # Drop the database
                session.run(f"DROP DATABASE {database} IF EXISTS")
                print(f"Database {database} cleaned up successfully")
        except Exception as e:
            print(f"Error cleaning up database: {str(e)}")

In [19]:
def close():
        """Close the Neo4j connection"""
        try:
            # cleanup_database()  # Clean up the database
            driver.close()
            print("Successfully closed Neo4j connection")
        except Exception as e:
            print(f"Error closing Neo4j connection: {str(e)}")

In [20]:
try:

    # Setup the QA chain
    print("\nSetting up QA chain...")
    qa_chain = setup_qa_chain()

    # Example questions specific to the dataset
    questions = [
    "What is the brand?",
    "Who are the Target Audience?",
    "What is Strategic Focus of the campaign?",
    ]


    # Ask questions
    print("\nProcessing questions...")
    for question in questions:
        print(f"\nQuestion: {question}")
        answer = ask_question(qa_chain,question)
        print(f"Answer: {answer}")

except Exception as e:
    print(f"An error occurred: {str(e)}")
finally:
        print("\nClosing connections...")
        close()


Setting up QA chain...
Successfully set up QA chain

Processing questions...

Question: What is the brand?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (b:Brand)
RETURN b.id
[0m
Full Context:
[32;1m[1;3m[{'b.id': 'Chips Ahoy!'}, {'b.id': 'Chips Ahoy! Cookies'}, {'b.id': 'Brand'}][0m

[1m> Finished chain.[0m
Answer: {'query': 'What is the brand?', 'result': 'Chips Ahoy! is the brand.'}

Question: Who are the Target Audience?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:Demographic)
RETURN d.id
[0m
Full Context:
[32;1m[1;3m[{'d.id': 'Gen Z'}, {'d.id': 'Young Adult Demographic'}][0m

[1m> Finished chain.[0m
Answer: {'query': 'Who are the Target Audience?', 'result': 'Gen Z, Young Adult Demographic are the target audience.'}

Question: What is Strategic Focus of the campaign?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH 