In [12]:
from langchain.chains import GraphCypherQAChain
from langchain.chat_models import ChatOpenAI
from langchain.graphs import Neo4jGraph
from dotenv import load_dotenv
import os
import openai


In [13]:

load_dotenv(os.path.join(os.path.expanduser('~'), '.spoke_neo4j_config.env'))
username = os.environ.get('NEO4J_USER')
password = os.environ.get('NEO4J_PSW')
url = os.environ.get('NEO4J_URI')
database = os.environ.get('NEO4J_DB')


In [14]:
%%time

graph = Neo4jGraph(
    url=url, 
    username=username, 
    password=password,
    database = database
)


CPU times: user 419 ms, sys: 72.2 ms, total: 491 ms
Wall time: 27.6 s


In [15]:
load_dotenv(os.path.join(os.path.expanduser('~'), '.gpt_config.env'))
API_KEY = os.environ.get('API_KEY')
API_VERSION = os.environ.get('API_VERSION')
RESOURCE_ENDPOINT = os.environ.get('RESOURCE_ENDPOINT')

openai.api_type = "azure"
openai.api_key = API_KEY
openai.api_base = RESOURCE_ENDPOINT
openai.api_version = API_VERSION

chat_deployment_id = 'gpt-4-32k'
chat_model_id = chat_deployment_id

temperature = 0

chat_model = ChatOpenAI(openai_api_key=API_KEY, 
                        engine=chat_deployment_id, 
                        temperature=temperature)


                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


In [16]:
chain = GraphCypherQAChain.from_llm(
    chat_model, 
    graph=graph, 
    verbose=True, 
    validate_cypher=True,
    return_intermediate_steps=True
)

In [17]:
from langchain.callbacks import get_openai_callback


In [20]:
question = 'What are the genes associated with multiple sclerosis?'
question = "Is Parkinson's disease associated with levodopa?"
# question="Which gene has stronger association with the disease 'liver benign neoplasm', is it PNPLA3 or HLA-B?"
# question='What is the clinical phase of levodopa in treating parkinson disease?'
with get_openai_callback() as cb:
    out = chain.run(query=question, return_final_only=False)
    print(out)




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Compound {name: 'levodopa'})-[:TREATS_CtD]->(d:Disease {name: 'Parkinson\'s disease'}) RETURN c,d[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
I'm sorry, but I don't have the information to answer that question.


In [100]:
question = 'What are the genes associated with multiple sclerosis?'
question="Which gene has stronger association with the disease 'liver benign neoplasm', is it PNPLA3 or HLA-B?"
question='What is the clinical phase of levodopa treating parkinson disease?'

class OutputCapturer:
    def __init__(self):
        self.outputs = []

    def write(self, output):
        self.outputs.append(output)

# Create an instance of OutputCapturer
output_capturer = OutputCapturer()

# Redirect standard output to the output_capturer
import sys
original_stdout = sys.stdout
sys.stdout = output_capturer

# Run the chain with your query
out = chain.run(query=question, return_final_only=False)

# Restore original stdout
sys.stdout = original_stdout

# Now `output_capturer.outputs` should contain all intermediate outputs


In [101]:
print(output_capturer.outputs[8])

[32;1m[1;3m[][0m


In [None]:
1. Correct context retrieval
2. Token utilization


In [54]:
chain.run

<bound method Chain.run of GraphCypherQAChain(memory=None, callbacks=None, callback_manager=None, verbose=True, tags=None, metadata=None, graph=<langchain.graphs.neo4j_graph.Neo4jGraph object at 0x7fd47aab9ed0>, cypher_generation_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=PromptTemplate(input_variables=['schema', 'question'], output_parser=None, partial_variables={}, template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}', template_format='f-string', v

In [29]:
print(chain.cypher_generation_chain.prompt.template)


Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

The question is:
{question}


In [42]:
print(chain.qa_chain.prompt.template)

You are an assistant that helps to form nice and human understandable answers.
The information part contains the provided information that you must use to construct an answer.
The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
If the provided information is empty, say that you don't know the answer.
Information:
{context}

Question: {question}
Helpful Answer:


In [48]:
schema = graph.schema
question
cypher_template = chain.cypher_generation_chain.prompt.template
cypher_template = cypher_template.format(schema=schema, question=question)
