In [1]:
# %pip install pandas
# %pip install langchain langchain_community
# %pip install neo4j
# %pip install langchain_openai
# %pip install python-dotenv
# %pip install chromadb
# %pip install "numpy<2"

In [2]:
import pandas as pd
import os
from langchain_community.graphs import Neo4jGraph

In [3]:
os.environ['NEO4J_URI'] = 'bolt://localhost:7690'
os.environ['NEO4J_USERNAME'] = 'neo4j'
os.environ['NEO4J_PASSWORD'] = 'Password@123'
graph = Neo4jGraph()

In [4]:
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv()

os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://sriks-openai.openai.azure.com/"
os.environ["AZURE_OPENAI_API_VERSION"] = "2024-05-01-preview"
os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"] = "gpt-4o"
os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"] = "text-embedding-ada-002"

llm = AzureChatOpenAI(
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
)

embeddings = AzureOpenAIEmbeddings(
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"],
)

In [5]:
examples = [
    {
        "question": "What is the root cause request id 8ff8696695aa73588ac454809741e2ea",
        "query": "MATCH (n)-[r:DEPENDS_ON]->(m) where n.id <> 'ROOT' OR m.id <> 'ROOT' and r.operationId = 8ff8696695aa73588ac454809741e2ea RETURN n.id, r.duration, r.operation_Name, m.id ORDER BY r.duration DESC LIMIT 3",
    },
    {
        "question": "what is the longest running operation",
        "query": "MATCH (n)-[r:DEPENDS_ON]->(m) where n.id <> 'ROOT' OR m.id <> 'ROOT' RETURN n.id, r.duration, r.operation_Name, m.id ORDER BY r.duration DESC LIMIT 3",
    }
]

In [289]:
from langchain.chains import GraphCypherQAChain
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
from langchain_core.example_selectors import SemanticSimilarityExampleSelector, MaxMarginalRelevanceExampleSelector
from langchain.vectorstores import Chroma
from langchain.chains.llm import LLMChain
from langchain.chains.graph_qa.cypher import construct_schema
from langchain.chains import SimpleSequentialChain, SequentialChain
from langchain.agents import AgentType, initialize_agent, load_tools

tools = load_tools(
    ["human"], llm
)

graph.refresh_schema()

QA_GENERATION_TEMPLATE = """
       Task: answer the question you are given based on the context provided.
       Instructions:
        You are an assistant that helps to form nice and human understandable answers. 
        Use the context information provided to generate a well organized and comprehensve answer to the user's question. 
        When the provided information contains multiple elements, structure your answer as a bulleted or numbered list to enhance clarity and readability.
        You must use the information to construct your answer. 
        The provided information is authoritative; do not doubt it or try to use your internal knowledge to correct it. 
        Make the answer sound like a response to the question without mentioning that you based the result on the given information. 
        If there is no information provided, say that the knowledge base returned empty results.

        Here's the information:
        {context}

        Question: {question}
        Answer:
            """
EXAMPLES_PROMPT_TEMPLATE = """   
                Input: {question},
                Output: {query}
            """

qaPrompt = PromptTemplate(input_variables=["context", "question"], template=QA_GENERATION_TEMPLATE)

example_prompt = PromptTemplate(input_variables=["question", "query"], template=EXAMPLES_PROMPT_TEMPLATE)

cypherPromptTemplate = """
You are an expert Neo4j Developer translating user questions into Cypher to answer questions.
Convert the user's question based on the schema.
Instructions: Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}

Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

Important: In the generated Cypher query, the RETURN statement must explicitly include the property values used in the query's filtering condition, alongside the main information requested from the original question.

Question: {question}
input:
"""

similaritySelector = SemanticSimilarityExampleSelector.from_examples(
    examples=examples, 
    embeddings=embeddings, 
    k=1,
    vectorstore_cls=Chroma
)

cypherPrompt = FewShotPromptTemplate(
    example_selector=similaritySelector,
    example_prompt=example_prompt,
    input_variables=["question", "schema"], 
    prefix=cypherPromptTemplate,
    suffix="the question is:{question}",
)

cypherqachain = GraphCypherQAChain.from_llm(
    llm = llm,
    return_intermediate_steps=True,
    validate_cypher=True,
    graph=graph, 
    verbose=True,
    k=3,
    use_function_response=True,
    cypherPrompt=cypherPrompt,
    return_direct=True,
    output_key="input",
) 

# cypherqachain = LLMChain(llm=llm, prompt=cypherPrompt)

schema  = construct_schema(graph.get_structured_schema, [], [])

In [290]:
cypherqachain.invoke({"query": "which nodes has max azure dependencies"})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (c:cloudRoleName)-[r:AZURE_DEPENDS_ON]->(s:SQL)
RETURN c.id, COUNT(r) AS azureDependencies
ORDER BY azureDependencies DESC
LIMIT 1
[0m

[1m> Finished chain.[0m


{'query': 'which nodes has max azure dependencies',
 'input': [{'c.id': 'people-api', 'azureDependencies': 23}],
 'intermediate_steps': [{'query': 'cypher\nMATCH (c:cloudRoleName)-[r:AZURE_DEPENDS_ON]->(s:SQL)\nRETURN c.id, COUNT(r) AS azureDependencies\nORDER BY azureDependencies DESC\nLIMIT 1\n'}]}

In [296]:


# result = cypherqachain.invoke({"query":"what is the root cause request id 8ff8696695aa73588ac454809741e2ea"})

# query = result['intermediate_steps'][0]['query']

# print(query)

prompt_template = """Given the {input} how confident are you in the query on scale of 1-5, answer precisely in JSON format
score: 1, reason: The query is not clear, assumptions: The query is not clear"""

prompt = PromptTemplate(template=prompt_template, input_variables=["input"])

# ask llm on confidence of the query
confidenceChain = LLMChain(
    llm=llm,
    prompt=prompt,
    output_key="score"
)    

In [291]:
from langchain_core.messages import AIMessage
from langchain_core.runnables import Runnable, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import json
from langchain.agents import create_react_agent, initialize_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import SystemMessage

llm_with_tools = llm.bind_tools(tools)

class NotApproved(Exception):
    """Custom exception."""

def human_approval(msg: AIMessage) -> AIMessage:
    """Responsible for passing through its input or raising an exception.

    Args:
        msg: output from the chat model

    Returns:
        msg: original output from the msg
    """
    tool_strs = "\n\n".join(
        json.dumps(tool_call, indent=2) for tool_call in msg.tool_calls
    )
    input_msg = (
        f"Do you approve of the following tool invocations\n\n{tool_strs}\n\n"
        "Anything except 'Y'/'Yes' (case-insensitive) will be treated as a no.\n >>>"
    )
    resp = input(input_msg)
    if resp.lower() not in ("yes", "y"):
        raise NotApproved(f"Tool invocations not approved:\n\n{tool_strs}")
    return msg

agent = initialize_agent(
    tools=tools, llm=llm, agent="zero-shot-react-description", verbose=True
)

In [294]:
print(agent.agent.llm_chain.prompt.template)

Answer the following questions as best you can. You have access to the following tools:

human - You can ask a human for guidance when you think you got stuck or you are not sure what to do next. The input should be a question for the human.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [human]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}


In [305]:
def queryExtractor(msg: AIMessage):
    # print(msg)
    query = msg["intermediate_steps"][0]["query"]
    msg = {"input": query}
    return msg

final_chain = cypherqachain | queryExtractor | confidenceChain | agent
final_chain.invoke({"query": "What is the root cause request id 8ff8696695aa73588ac454809741e2ea"})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (root:cloudRoleName)-[:DEPENDS_ON|INPROC_TT|AZURE_DEPENDS_ON*]->(leaf:SQL)
WHERE leaf.id = '8ff8696695aa73588ac454809741e2ea'
RETURN root.id AS rootCauseRequestId
[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThis query is written in Cypher, which is a query language for the Neo4j graph database. The query is looking for nodes in the graph database that have a specific relationship path to a node identified by `leaf.id = '8ff8696695aa73588ac454809741e2ea'`. Specifically, it is looking for `root` nodes of type `cloudRoleName` that have a relationship path to `leaf` nodes of type `SQL`.

Here’s a breakdown of the query:
1. `MATCH (root:cloudRoleName)-[:DEPENDS_ON|INPROC_TT|AZURE_DEPENDS_ON*]->(leaf:SQL)`:
   - This part of the query matches nodes labeled `cloudRoleName` (denoted as `root`) that have a relationship path (denoted by `[:DEPENDS_ON|I

{'input': "cypher\nMATCH (root:cloudRoleName)-[:DEPENDS_ON|INPROC_TT|AZURE_DEPENDS_ON*]->(leaf:SQL)\nWHERE leaf.id = '8ff8696695aa73588ac454809741e2ea'\nRETURN root.id AS rootCauseRequestId\n",
 'score': '```json\n{\n  "score": 1,\n  "reason": "The query lacks clarity, and it\'s difficult to determine its correctness without further context or explanation.",\n  "assumptions": "The query is ambiguous and does not provide sufficient information about the data model or the purpose of the query."\n}\n```',
 'output': "The Cypher query is looking for nodes labeled `cloudRoleName` that have a relationship path (of any length) involving `DEPENDS_ON`, `INPROC_TT`, or `AZURE_DEPENDS_ON` to nodes labeled `SQL` with the specific `id` of '8ff8696695aa73588ac454809741e2ea'. It returns the `id` of the `cloudRoleName` nodes as `rootCauseRequestId`."}

[Work In Progress]

In [None]:
# pc_df = pd.read_csv('pc.csv')
# pc_df.head(1)

In [None]:
# pc_df.iloc[0]['InstanceName']

In [None]:
# def parseServiceName(x):
#     splits = x.split('/')
#     servicename = splits[-1]
#     return servicename
# def parseInstanceId(x):
#     splits = x.split('/')
#     instanceid = splits[-2]
#     return instanceid
# pc_df['ServiceName'] = pc_df['InstanceName'].apply(lambda x: parseServiceName(x))
# pc_df['InstanceId'] = pc_df['InstanceName'].apply(lambda x: parseInstanceId(x))
# # pc_df.drop(columns=['InstanceName'], inplace=True)
# pc_df.rename(columns={'TimeGenerated [UTC]': 'timestamp'}, inplace=True)
# pc_df.head(5)

In [None]:
# # pivot the table to get the service names as columns
# pc_df = pc_df.pivot(index='timestamp', columns='ServiceName', values='avg_CounterValue')