In [8]:
# %pip install pandas
# %pip install langchain langchain_community
# %pip install neo4j
# %pip install langchain_openai
# %pip install python-dotenv
# %pip install chromadb
# %pip install "numpy<2"

In [9]:
import pandas as pd
import os
from langchain_community.graphs import Neo4jGraph
from langchain_core.messages import AIMessage
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from dotenv import load_dotenv

In [10]:
os.environ['NEO4J_URI'] = 'bolt://localhost:7690'
os.environ['NEO4J_USERNAME'] = 'neo4j'
os.environ['NEO4J_PASSWORD'] = 'Password@123'
graph = Neo4jGraph()

In [11]:
load_dotenv()

os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://sriks-openai.openai.azure.com/"
os.environ["AZURE_OPENAI_API_VERSION"] = "2024-05-01-preview"
os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"] = "gpt-4o"
os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"] = "text-embedding-ada-002"

llm = AzureChatOpenAI(
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
)

embeddings = AzureOpenAIEmbeddings(
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"],
)

In [12]:
examples = [
    {
        "question": "What is the root cause request id 8ff8696695aa73588ac454809741e2ea",
        "query": "MATCH (n)-[r:DEPENDS_ON]->(m) where n.id <> 'ROOT' OR m.id <> 'ROOT' and r.operationId = 8ff8696695aa73588ac454809741e2ea RETURN n.id, r.duration, r.operation_Name, m.id ORDER BY r.duration DESC LIMIT 3",
    },
    {
        "question": "what is the longest running operation",
        "query": "MATCH (n)-[r:DEPENDS_ON]->(m) where n.id <> 'ROOT' OR m.id <> 'ROOT' RETURN n.id, r.duration, r.operation_Name, m.id ORDER BY r.duration DESC LIMIT 3",
    }
]

In [13]:
from langchain.chains import GraphCypherQAChain
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
from langchain_core.example_selectors import SemanticSimilarityExampleSelector, MaxMarginalRelevanceExampleSelector
from langchain.vectorstores import Chroma
from langchain.chains.llm import LLMChain
from langchain.chains.graph_qa.cypher import construct_schema
from langchain.chains import SimpleSequentialChain, SequentialChain
from langchain.agents import AgentType, initialize_agent, load_tools

# load human intervention tools
tools = load_tools(
    ["human"], llm
)

graph.refresh_schema()

QA_GENERATION_TEMPLATE = """
       Task: answer the question you are given based on the context provided.
       Instructions:
        You are an assistant that helps to form nice and human understandable answers. 
        Use the context information provided to generate a well organized and comprehensve answer to the user's question. 
        When the provided information contains multiple elements, structure your answer as a bulleted or numbered list to enhance clarity and readability.
        You must use the information to construct your answer. 
        The provided information is authoritative; do not doubt it or try to use your internal knowledge to correct it. 
        Make the answer sound like a response to the question without mentioning that you based the result on the given information. 
        If there is no information provided, say that the knowledge base returned empty results.

        Here's the information:
        {context}

        Question: {question}
        Answer:
            """
EXAMPLES_PROMPT_TEMPLATE = """   
                Input: {question},
                Output: {query}
            """

qaPrompt = PromptTemplate(input_variables=["context", "question"], template=QA_GENERATION_TEMPLATE)

example_prompt = PromptTemplate(input_variables=["question", "query"], template=EXAMPLES_PROMPT_TEMPLATE)

cypherPromptTemplate = """
You are an expert Neo4j Developer translating user questions into Cypher to answer questions.
Convert the user's question based on the schema.
Instructions: Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}

Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

Important: In the generated Cypher query, the RETURN statement must explicitly include the property values used in the query's filtering condition, alongside the main information requested from the original question.

Question: {question}
input:
"""

similaritySelector = SemanticSimilarityExampleSelector.from_examples(
    examples=examples, 
    embeddings=embeddings, 
    k=1,
    vectorstore_cls=Chroma
)

cypherPrompt = FewShotPromptTemplate(
    example_selector=similaritySelector,
    example_prompt=example_prompt,
    input_variables=["question", "schema"], 
    prefix=cypherPromptTemplate,
    suffix="the question is:{question}",
)

cypherqachain = GraphCypherQAChain.from_llm(
    llm = llm,
    return_intermediate_steps=True,
    validate_cypher=True,
    graph=graph, 
    verbose=True,
    k=3,
    use_function_response=True,
    cypherPrompt=cypherPrompt,
    return_direct=True,
    output_key="input",
) 

schema  = construct_schema(graph.get_structured_schema, [], [])

In [14]:
# Test cypher chain invocation
cypherqachain.invoke({"query": "which nodes has max azure dependencies"})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (crn:cloudRoleName)-[r:AZURE_DEPENDS_ON]->(sql:SQL)
RETURN crn.id, COUNT(r) AS azureDependencyCount
ORDER BY azureDependencyCount DESC
LIMIT 1
[0m

[1m> Finished chain.[0m


{'query': 'which nodes has max azure dependencies',
 'input': [{'crn.id': 'people-api', 'azureDependencyCount': 23}],
 'intermediate_steps': [{'query': 'cypher\nMATCH (crn:cloudRoleName)-[r:AZURE_DEPENDS_ON]->(sql:SQL)\nRETURN crn.id, COUNT(r) AS azureDependencyCount\nORDER BY azureDependencyCount DESC\nLIMIT 1\n'}]}

The below code, creates a confidence chain that will be used to ask LLM how confident it is query generated.

In [43]:
confidence_prompt_template = """Given the {input}, {schema} how confident are you in the query on scale of 1-5, answer precisely in JSON format
score: 1, reason: The query is not clear, assumptions: The query is not clear"""

confidence_prompt = PromptTemplate(template=confidence_prompt_template, input_variables=["input", "schema"])

# ask llm on confidence of the query
confidenceChain = LLMChain(
    llm=llm,
    prompt=confidence_prompt
)    

In [44]:
human_interaction_prompt = """How do you want to proceed?"""
agent = initialize_agent(
    tools=tools, llm=llm, agent="zero-shot-react-description", verbose=True, prompt=human_interaction_prompt
)

In [45]:
# function is used to extract the query from the message, schema is a global variable
def queryExtractor(msg: AIMessage):
    print(msg)
    query = msg["intermediate_steps"][0]["query"]
    question = msg["query"]
    msg = {"input": query, "schema": schema}
    return msg

# function is used to extract the final context from the message, context here is the cypher query 
def extractFinalContext(msg: AIMessage):
    print(msg)
    context = msg["input"]
    return {"context": context}

# function is used to execute the cypher query
def executeCypher(msg: AIMessage):
    query = msg["context"]
    query = query.replace("cypher", "")
    result = graph.query(query)
    result = pd.DataFrame(result)
    print(result)
    return result

final_chain = cypherqachain | queryExtractor | confidenceChain | agent | extractFinalContext | executeCypher #| qaPrompt

In [46]:
final_chain.invoke({"query": "how many nodes are connected to sql"})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (:cloudRoleName)-[:AZURE_DEPENDS_ON]->(sql:SQL)
RETURN count(DISTINCT sql)
[0m

[1m> Finished chain.[0m
{'query': 'how many nodes are connected to sql', 'input': [{'count(DISTINCT sql)': 2}], 'intermediate_steps': [{'query': 'cypher\nMATCH (:cloudRoleName)-[:AZURE_DEPENDS_ON]->(sql:SQL)\nRETURN count(DISTINCT sql)\n'}]}


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe question appears to involve a Cypher query related to Neo4j, a graph database, querying for SQL nodes that have a specific type of relationship with cloudRoleName nodes.

Let's break down the provided Cypher query:
- `MATCH (:cloudRoleName)-[:AZURE_DEPENDS_ON]->(sql:SQL)`: This part of the query is looking for a pattern where there are nodes with the label `cloudRoleName` that have an `AZURE_DEPENDS_ON` relationship to nodes labeled `SQL`.
- `RETURN count(DISTINCT sql)`: This part returns the count of distin

Unnamed: 0,count(DISTINCT sql)
0,2
