### Q&A App Using Langchain and GraphDB

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
NEO4J_URI= os.getenv("NEO4J_URI")
NEO4J_USERNAME=os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD=os.getenv("NEO4J_PASSWORD")

In [3]:
os.environ["NEO4J_URI"] = NEO4J_URI
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD

In [8]:
from langchain_community.graphs import Neo4jGraph
graph=Neo4jGraph(url=NEO4J_URI,username=NEO4J_USERNAME,password=NEO4J_PASSWORD)

### Movie Dataset

In [28]:
movie_query = """LOAD CSV WITH HEADERS FROM 
            'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv' AS row

            MERGE (m:Movie {id: row.movieId})
            SET m.released = date(row.released),
                m.title = row.title,
                m.imdbRating = toFloat(row.imdbRating)
            FOREACH (director IN split(row.director, '|') |
                MERGE (p:Person {name: trim(director)})
                MERGE (p)-[:DIRECTED]->(m)
            )
            FOREACH (actor IN split(row.actors, '|') |
                MERGE (p:Person {name: trim(actor)})
                MERGE (p)-[:ACTED_IN]->(m)
            )
            FOREACH (genre IN split(row.genres, '|') |
                MERGE (g:Genre {name: trim(genre)})
                MERGE (m)-[:IN_GENRE]->(g)
            )"""

In [29]:
graph.query(movie_query)

[]

In [30]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Company {NSE: STRING, MCAP: FLOAT, FNO: STRING, Nifty50: STRING}
Entrepreneur {name: STRING, POB: STRING, YOB: INTEGER}
Country {name: STRING}
Student {name: STRING, POB: STRING, YOB: INTEGER}
Person {name: STRING, YOB: INTEGER}
Movie {release: INTEGER, title: STRING, id: STRING, released: DATE, imdbRating: FLOAT}
User {name: STRING, city: STRING, userID: INTEGER, age: INTEGER}
Post {postId: INTEGER, content: STRING, timestamp: DATE_TIME}
Genre {name: STRING}
Relationship properties:

The relationships:
(:Person)-[:ACTED_IN]->(:Movie)
(:Person)-[:DIRECTED]->(:Movie)
(:Movie)-[:IN_GENRE]->(:Genre)
(:User)-[:POSTED]->(:Post)
(:User)-[:LIKES]->(:User)
(:User)-[:FRIEND]->(:User)


In [12]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [13]:
from langchain_groq import ChatGroq
llm = ChatGroq(model="Llama3-8b-8192")

In [24]:
from langchain.chains import GraphCypherQAChain

In [25]:
chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm,verbose=True)

In [26]:
chain 

GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x7fc2380c24a0>, cypher_generation_chain=LLMChain(prompt=PromptTemplate(input_variables=['question', 'schema'], template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'), llm=ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x7fc237319d50>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x7fc23731a9e0>, model_name='Llama3-8b-8192', groq_api_key=SecretStr('**********'))), qa_chai

In [40]:
response = chain.invoke({"query": "When Satyamev Jayate released?"})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Movie) WHERE m.title = "Satyamev Jayate" RETURN m.release;[0m
Full Context:
[32;1m[1;3m[{'m.release': 1994}][0m

[1m> Finished chain.[0m


In [41]:
response

{'query': 'When Satyamev Jayate released?', 'result': '1994'}

In [43]:
response = chain.invoke({"query": "Who is director of Species?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:DIRECTED]->(m:Movie) WHERE m.title = "Species" RETURN p.name AS DirectorName;[0m
Full Context:
[32;1m[1;3m[{'DirectorName': 'Roger Donaldson'}][0m

[1m> Finished chain.[0m


{'query': 'Who is director of Species?',
 'result': 'Roger Donaldson is the director of Species.'}

In [44]:
response = chain.invoke({"query": "Who is actor in Destiny Turns on the Radio?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:ACTED_IN]->(m:Movie) WHERE m.title = "Destiny Turns on the Radio" RETURN p.name AS ActorName;[0m
Full Context:
[32;1m[1;3m[{'ActorName': 'Quentin Tarantino'}, {'ActorName': 'James Le Gros'}, {'ActorName': 'Dylan McDermott'}, {'ActorName': 'Nancy Travis'}][0m

[1m> Finished chain.[0m


{'query': 'Who is actor in Destiny Turns on the Radio?',
 'result': 'Quentin Tarantino, James Le Gros, Dylan McDermott, Nancy Travis is an actor in Destiny Turns on the Radio.'}

### Prompt Strategies

In [45]:
chain1 = GraphCypherQAChain.from_llm(graph=graph, llm=llm, exclude_types=['Genre'], verbose=True)
chain1

GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x7fc2380c24a0>, cypher_generation_chain=LLMChain(prompt=PromptTemplate(input_variables=['question', 'schema'], template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'), llm=ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x7fc237319d50>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x7fc23731a9e0>, model_name='Llama3-8b-8192', groq_api_key=SecretStr('**********'))), qa_chai

In [47]:
chain1.graph_schema

'Node properties are the following:\nCompany {NSE: STRING, MCAP: FLOAT, FNO: STRING, Nifty50: STRING},Entrepreneur {name: STRING, POB: STRING, YOB: INTEGER},Country {name: STRING},Student {name: STRING, POB: STRING, YOB: INTEGER},Person {name: STRING, YOB: INTEGER},Movie {release: INTEGER, title: STRING, id: STRING, released: DATE, imdbRating: FLOAT},User {name: STRING, city: STRING, userID: INTEGER, age: INTEGER},Post {postId: INTEGER, content: STRING, timestamp: DATE_TIME}\nRelationship properties are the following:\n\nThe relationships are the following:\n(:Person)-[:ACTED_IN]->(:Movie),(:Person)-[:DIRECTED]->(:Movie),(:User)-[:POSTED]->(:Post),(:User)-[:LIKES]->(:User),(:User)-[:FRIEND]->(:User)'

In [48]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

In [49]:
examples = [
    {
        "question": "How many artists are there?",
        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)",
    },
    {
        "question": "Which actors played in the movie Casino?",
        "query": "MATCH (m:Movie {{title: 'Casino'}})<-[:ACTED_IN]-(a) RETURN a.name",
    },
    {
        "question": "How many movies has Tom Hanks acted in?",
        "query": "MATCH (a:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)",
    },
    {
        "question": "List all the genres of the movie Schindler's List",
        "query": "MATCH (m:Movie {{title: 'Schindler\\'s List'}})-[:IN_GENRE]->(g:Genre) RETURN g.name",
    },
    {
        "question": "Which actors have worked in movies from both the comedy and action genres?",
        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name",
    },
    {
        "question": "Which directors have made movies with at least three different actors named 'John'?",
        "query": "MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINCT a) AS JohnsCount WHERE JohnsCount >= 3 RETURN d.name",
    },
    {
        "question": "Identify movies where directors also played a role in the film.",
        "query": "MATCH (p:Person)-[:DIRECTED]->(m:Movie), (p)-[:ACTED_IN]->(m) RETURN m.title, p.name",
    },
    {
        "question": "Find the actor with the highest number of movies in the database.",
        "query": "MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1",
    },
]

In [63]:
example_prompt=PromptTemplate.from_template(
    "User input:{question}\n Cypher query:{query}"
)

prompt=FewShotPromptTemplate(
    examples=examples[:5],
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question,create a syntactically very accurate Cypher query",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question"]
)

In [64]:
prompt

FewShotPromptTemplate(input_variables=['question'], examples=[{'question': 'How many artists are there?', 'query': 'MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)'}], example_prompt=PromptTemplate(input_variables=['query', 'question'], template='User input:{question}\n Cypher query:{query}'), suffix='User input: {question}\nCypher query: ', prefix='You are a Neo4j expert. Given an input question,create a syntactically very accurate Cypher query')

In [65]:
try:
    formatted_prompt = prompt.format(question="How many artists are there?")
    print(formatted_prompt)
except KeyError as e:
    print(f"KeyError: {e}")

You are a Neo4j expert. Given an input question,create a syntactically very accurate Cypher query

User input:How many artists are there?
 Cypher query:MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)

User input: How many artists are there?
Cypher query: 


In [67]:
chain2 = GraphCypherQAChain.from_llm(graph=graph, llm=llm, cypher_prompt=prompt, verbose=True)

In [68]:
chain2.invoke("Which actors played in the movie Casino?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (a:Person)-[:ACTED_IN]->(m:Movie {title: "Casino"})
RETURN a
[0m
Full Context:
[32;1m[1;3m[{'a': {'name': 'Robert De Niro'}}, {'a': {'name': 'Joe Pesci'}}, {'a': {'name': 'Sharon Stone'}}, {'a': {'name': 'James Woods'}}][0m

[1m> Finished chain.[0m


{'query': 'Which actors played in the movie Casino?',
 'result': 'Robert De Niro, Joe Pesci, Sharon Stone, James Woods played in the movie Casino.'}

In [69]:
chain2.invoke("How many movies has Tom Hanks acted in?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (tom_hanks:Person {name: "Tom Hanks"})-[:ACTED_IN]->(movie:Movie)
RETURN COUNT(DISTINCT movie)
[0m
Full Context:
[32;1m[1;3m[{'COUNT(DISTINCT movie)': 2}][0m

[1m> Finished chain.[0m


{'query': 'How many movies has Tom Hanks acted in?',
 'result': 'Tom Hanks has acted in 2 movies.'}

In [71]:
chain2.invoke("Find the actor with the highest number of movies in the database.")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m
MATCH (a:Person)-[:ACTED_IN]->(m:Movie)
RETURN a, COUNT(m) AS num_movies
ORDER BY num_movies DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'a': {'name': 'Gene Hackman'}, 'num_movies': 4}][0m

[1m> Finished chain.[0m


{'query': 'Find the actor with the highest number of movies in the database.',
 'result': "I couldn't find any information about the actor with the highest number of movies in the database, as the provided information only contains a single actor with 4 movies."}