In [2]:
!pip install langchain openai wikipedia tiktoken neo4j langchain_openai

Collecting langchain
  Downloading langchain-0.1.11-py3-none-any.whl (807 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m807.5/807.5 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai
  Downloading openai-1.13.3-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.4/227.4 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tiktoken
  Downloading tiktoken-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting neo4j
  Downloading neo4j-5.18.0.tar.gz (198 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m198.0/198.0 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[

In [3]:
import os
from langchain_community.vectorstores.neo4j_vector import Neo4jVector
from langchain.document_loaders import WikipediaLoader
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document

In [8]:
import os
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
url = ""
username = ""
password = ""
model_version='gpt-3.5-turbo-16k'
openai_api_key = os.environ.get('OPENAI_API_KEY')

retrieval_query="""
 MATCH (node)-[:PART_OF]->(d:Document)
WITH d, apoc.text.join(collect(node.text),"\n----\n") as text, avg(score) as score
RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName)} as metadata
"""

neo_db=Neo4jVector.from_existing_index(
        embedding=OpenAIEmbeddings(),
        url=url,
        username=username,
        password=password,
        database="neo4j",
        index_name="vector",
        retrieval_query=retrieval_query,

    )

llm = ChatOpenAI(model= model_version, temperature=0)

qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=neo_db.as_retriever(), return_source_documents=True
)


In [23]:
question ="What do you know about Suarez"
result = qa({"query": question})
res={}
res['result']=result["result"]
list_source_docs=[]
for i in result["source_documents"]:
  list_source_docs.append(i.metadata['source'])
res['source']=list_source_docs

In [24]:
vector_res=res
print(vector_res)

{'result': "I'm sorry, but I don't have enough information to answer your question about Suarez.", 'source': ['https://www.youtube.com/watch?v=NKc8Tr5_L3w']}


In [50]:
question ="What do you know about Apple Stock"
result = qa({"query": question})
res={}
res['result']=result["result"]
list_source_docs=[]
for i in result["source_documents"]:
  list_source_docs.append(i.metadata['source'])
res['source']=list_source_docs

In [20]:
res

{'result': "Apple Inc. (AAPL) is a multinational technology company that designs, manufactures, and sells consumer electronics, software, and online services. It is known for its flagship products such as the iPhone, iPad, Mac, and Apple Watch. Apple is also a major player in the digital services industry with offerings like Apple Music, iCloud, and the App Store.\n\nIn terms of its stock performance, Apple has been one of the most valuable and widely held stocks in the world. It is listed on the NASDAQ stock exchange under the ticker symbol AAPL. The stock has experienced significant growth over the years, driven by strong product sales, a loyal customer base, and a robust ecosystem of software and services.\n\nHowever, it's important to note that stock prices can be volatile and subject to market fluctuations. It is always recommended to conduct thorough research and consult with a financial advisor before making any investment decisions.",
 'source': ['Apple stock during pandemic.pd

In [48]:
#####################

In [9]:
# Query the knowledge graph in a RAG application
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph

graph = Neo4jGraph(
    url=url,
    username=username,
    password=password
)
graph.refresh_schema()

cypher_chain = GraphCypherQAChain.from_llm(
    graph=graph,
    cypher_llm=ChatOpenAI(temperature=0, model="gpt-4"),
    qa_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo"),
    validate_cypher=True, # Validate relationship directions
    verbose=True
)

In [35]:
cypher_res=cypher_chain.invoke({"query": "What do you know about Suarez"})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person {name: "Suarez"}) RETURN p[0m
Full Context:
[32;1m[1;3m[{'p': {'nationality': 'Uruguayan', 'name': 'Suarez', 'id': 'Suarez', 'position': 'forward'}}][0m

[1m> Finished chain.[0m


In [36]:
cypher_res

{'query': 'What do you know about Suarez',
 'result': 'Suarez is a Uruguayan forward.'}

In [21]:
cypher_res=cypher_chain.invoke({"query": "What do you know about Apple Stock"})



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


In [None]:
######################

In [45]:
query="What do you know about Suarez"
final_prompt = f"""You are a helpful question-answering agent. Your task is to analyze
and synthesize information from two sources: the top result from a similarity search
(unstructured information) and relevant data from a graph database (structured information).
Given the user's query: {query}, provide a meaningful and efficient answer based
on the insights derived from the following data:

Structured information: {cypher_res['result']}.
Unstructured information: {vector_res['result']}.

If unstructured information fails to find an answer then use the answer from structured information and vice versa. I only want a straightforward answer
"""

response = llm.predict(final_prompt)

# print(re.search(r"Answer: (.+)", response[0]['generated_text']).group(1))

In [46]:
response

'Suarez is a Uruguayan forward.'