In [3]:
import pandas as pd
import os
from langchain_community.graphs import Neo4jGraph
from tqdm import tqdm
import numpy as np
import re


In [4]:
os.environ['NEO4J_URI'] = 'bolt://localhost:7690'
os.environ['NEO4J_USERNAME'] = 'neo4j'
os.environ['NEO4J_PASSWORD'] = 'Password@123'
graph = Neo4jGraph()

In [5]:
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv()

os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://sriks-openai.openai.azure.com/"
os.environ["AZURE_OPENAI_API_VERSION"] = "2024-05-01-preview"
os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"] = "gpt-4o"
os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"] = "text-embedding-ada-002"

llm = AzureChatOpenAI(
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"],
)

embeddings = AzureOpenAIEmbeddings(
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"],
)

In [6]:
examples = [
    {
        "question": "What is the root cause request id 8ff8696695aa73588ac454809741e2ea",
        "query": "MATCH (n)-[r:DEPENDS_ON]->(m) where n.id <> 'ROOT' OR m.id <> 'ROOT' and r.operationId = 8ff8696695aa73588ac454809741e2ea RETURN n.id, r.duration, r.operation_Name, m.id ORDER BY r.duration DESC LIMIT 3",
    }
]

In [7]:
from langchain.chains import GraphCypherQAChain
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain.vectorstores import Chroma

graph.refresh_schema()

QA_GENERATION_TEMPLATE = """
       Task: answer the question you are given based on the context provided.
       Instructions:
        You are an assistant that helps to form nice and human understandable answers. 
        Use the context information provided to generate a well organized and comprehensve answer to the user's question. 
        When the provided information contains multiple elements, structure your answer as a bulleted or numbered list to enhance clarity and readability.
        You must use the information to construct your answer. 
        The provided information is authoritative; do not doubt it or try to use your internal knowledge to correct it. 
        Make the answer sound like a response to the question without mentioning that you based the result on the given information. 
        If there is no information provided, say that the knowledge base returned empty results.

        Here's the information:
        {context}

        Question: {question}
        Answer:
            """
EXAMPLES_PROMPT_TEMPLATE = """   
                Input: {question},
                Output: {query}
            """
qaPrompt = PromptTemplate(input_variables=["context", "question"], template=QA_GENERATION_TEMPLATE)
example_prompt = PromptTemplate(input_variables=["question", "query"], template=EXAMPLES_PROMPT_TEMPLATE)

cypherPromptTemplate = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}

Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

The question is:
{question} 


"""

similaritySelector = SemanticSimilarityExampleSelector.from_examples(
    examples=examples, 
    embeddings=embeddings, 
    k=1,
    vectorstore_cls=Chroma
)

cypherPrompt = FewShotPromptTemplate(
    example_selector=similaritySelector,
    example_prompt=example_prompt,
    examples=examples,
    input_variables=["schema", "question"], 
    prefix=cypherPromptTemplate,
    suffix="The question is:\n{question}",
)

cypher_chain = GraphCypherQAChain.from_llm(
    cypher_llm = llm,
    qa_llm = llm, 
    return_intermediate_steps=True,
    validate_cypher=True,
    graph=graph, 
    verbose=True,
    qa_llm_kwargs={"prompt":qaPrompt},
    cypher_llm_kwargs={ "prompt": cypherPrompt},
)


: 

In [None]:
result = cypher_chain({"query": "What is the root cause request id 8ff8696695aa73588ac454809741e2ea"})
print(result['result'])

In [None]:
pc_df = pd.read_csv('pc.csv')
pc_df.head(1)

In [None]:
pc_df.iloc[0]['InstanceName']

In [None]:
def parseServiceName(x):
    splits = x.split('/')
    servicename = splits[-1]
    return servicename
def parseInstanceId(x):
    splits = x.split('/')
    instanceid = splits[-2]
    return instanceid
pc_df['ServiceName'] = pc_df['InstanceName'].apply(lambda x: parseServiceName(x))
pc_df['InstanceId'] = pc_df['InstanceName'].apply(lambda x: parseInstanceId(x))
# pc_df.drop(columns=['InstanceName'], inplace=True)
pc_df.rename(columns={'TimeGenerated [UTC]': 'timestamp'}, inplace=True)
pc_df.head(5)

In [None]:
# pivot the table to get the service names as columns
pc_df = pc_df.pivot(index='timestamp', columns='ServiceName', values='avg_CounterValue')