In [1]:
#imports and variables
import textwrap

# Langchain
from langchain_community.graphs.neo4j_graph import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI


# Environment Variables
from dotenv import load_dotenv
import os
load_dotenv('neo4j.env')
uri = os.getenv('NEO4J_URI')
user = os.getenv('NEO4J_USERNAME')
password = os.getenv('NEO4J_PASSWORD')

# OpenAI
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [81]:
VECTOR_INDEX_NAME = 'description_embeddings'
VECTOR_NODE_LABEL = 'EmbeddingNode'
VECTOR_SOURCE_PROPERTY = 'description'
VECTOR_EMBEDDING_PROPERTY = 'descriptionEmbedding'

In [3]:
# Connect with AuraDB database
kg = Neo4jGraph(url=uri,username=user,password=password)

In [69]:
# Create Vector index
kg.query("""
  CREATE VECTOR INDEX description_embeddings IF NOT EXISTS
  FOR (n:EmbeddingNode) ON (n.descriptionEmbedding) 
  OPTIONS { indexConfig: {
    `vector.dimensions`: 1536,
    `vector.similarity_function`: 'cosine'
  }}""")


[]

In [70]:
kg.query("""SHOW INDEXES""")

[{'id': 6,
  'name': 'description_embeddings',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['EmbeddingNode'],
  'properties': ['descriptionEmbedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': None,
  'readCount': None},
 {'id': 0,
  'name': 'index_343aff4e',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'LOOKUP',
  'entityType': 'NODE',
  'labelsOrTypes': None,
  'properties': None,
  'indexProvider': 'token-lookup-1.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 7, 9, 8, 38, 45, 790000000, tzinfo=<UTC>),
  'readCount': 255},
 {'id': 1,
  'name': 'index_f7700477',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'LOOKUP',
  'entityType': 'RELATIONSHIP',
  'labelsOrTypes': None,
  'properties': None,
  'indexProvider': 'token-lookup-1.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 7, 8, 14, 21, 8, 804000000,

In [71]:
kg.query("""
    MATCH (n:EmbeddingNode) WHERE n.description IS NOT NULL
    WITH n, genai.vector.encode(
       n.description, 
        "OpenAI", 
        {
          token: $openAiApiKey
        }) AS vector
    CALL db.create.setNodeVectorProperty(n, "descriptionEmbedding", vector)
    """, 
    params={"openAiApiKey":OPENAI_API_KEY})

[]

In [72]:
kg.refresh_schema()
print(kg.schema)

Node properties:
Company {name: STRING, description: STRING, descriptionEmbedding: LIST}
Car {name: STRING, description: STRING, descriptionEmbedding: LIST}
Color {name: STRING, description: STRING, descriptionEmbedding: LIST}
ColorGradient {name: STRING, description: STRING, descriptionEmbedding: LIST}
Price {description: STRING, descriptionEmbedding: LIST}
EmbeddingNode {name: STRING, description: STRING, descriptionEmbedding: LIST}
Relationship properties:

The relationships:
(:Car)-[:MANUFACTURED_BY]->(:Company)
(:Car)-[:MANUFACTURED_BY]->(:EmbeddingNode)
(:Car)-[:AVAILABLE_IN]->(:Color)
(:Car)-[:AVAILABLE_IN]->(:EmbeddingNode)
(:Car)-[:COSTS]->(:Price)
(:Car)-[:COSTS]->(:EmbeddingNode)
(:Color)-[:SHADES]->(:ColorGradient)
(:Color)-[:SHADES]->(:EmbeddingNode)
(:EmbeddingNode)-[:MANUFACTURED_BY]->(:Company)
(:EmbeddingNode)-[:MANUFACTURED_BY]->(:EmbeddingNode)
(:EmbeddingNode)-[:AVAILABLE_IN]->(:Color)
(:EmbeddingNode)-[:AVAILABLE_IN]->(:EmbeddingNode)
(:EmbeddingNode)-[:COSTS]->(:P

In [74]:
question = "which car starts with family?"

In [78]:
kg.query("""
    WITH genai.vector.encode(
        $question, 
        "OpenAI", 
        {
          token: $openAiApiKey
        }) AS question_embedding
    CALL db.index.vector.queryNodes(
        'description_embeddings', 
        $top_k, 
        question_embedding
        ) YIELD node AS n, score
    RETURN n.name, n.description, score
    """, 
    params={"openAiApiKey":OPENAI_API_KEY,
            "question": question,
            "top_k": 5
            })

[{'n.name': 'model3',
  'n.description': 'The FamilyWagon 4000 is the ideal vehicle for families, offering a perfect blend of comfort, space, and safety. Designed with family needs in mind, this car provides ample room for passengers and cargo, advanced safety features, and a comfortable ride for long journeys. The FamilyWagon 4000 ensures that every family trip is enjoyable and secure, making it a reliable companion for everyday use and road trips alike.',
  'score': 0.9058119058609009},
 {'n.name': 'AutomotiveX',
  'n.description': "Founded in 1995, AutoMotiveX quickly emerged as a pioneer in the automotive industry, known for its dedication to innovation and excellence. Headquartered in the heart of Detroit, Michigan, the company has revolutionized car manufacturing with its cutting-edge technology and sustainable practices. AutoMotiveX's flagship models, including the Speedster 2000, EcoRide 3000, and FamilyWagon 4000, have set new standards in performance, efficiency, and safety. 

In [90]:
neo4j_vector_store = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(),
    url=uri,
    username=user,
    password=password,
    index_name=VECTOR_INDEX_NAME,
    node_label=VECTOR_NODE_LABEL,
    text_node_properties=[VECTOR_SOURCE_PROPERTY],
    embedding_node_property=VECTOR_EMBEDDING_PROPERTY,
)



In [92]:
retriever = neo4j_vector_store.as_retriever()

In [97]:
chain = RetrievalQAWithSourcesChain.from_chain_type(
    ChatOpenAI(temperature=0), 
    chain_type="stuff", 
    retriever=retriever
)

In [98]:
def prettychain(question: str) -> str:
    """Pretty print the chain's response to a question"""
    response = chain({"question": question},
        return_only_outputs=True,)
    print(textwrap.fill(response['answer'], 60))

In [99]:
question = "What is the car that starts with the word fam.."

In [100]:
prettychain(question)

The car that starts with "fam" is the FamilyWagon 4000.


In [107]:
question = "Which company does Eco... belong to and what is its full name."

In [None]:
prettychain(question)

In [117]:
prettychain("which car is the safest and why?")

The safest car is the FamilyWagon 4000 because it is
designed with advanced safety features specifically for
families, ensuring a secure ride for passengers.


In [137]:
retrieval_query_window = """
MATCH window = 
    (:EmbeddingNode)<-[*0..1]-(node)-[*0..1]->(:EmbeddingNode)
WITH node, score, window as longestWindow 
  ORDER BY length(window) DESC LIMIT 1
WITH nodes(longestWindow) as nodeList, node, score
  UNWIND nodeList as nodeRows
WITH collect(nodeRows.description) as textList, node, score
RETURN apoc.text.join(textList, " \n ") as text,
    score,
    node {.source} AS metadata
"""

In [138]:
vector_store_window = Neo4jVector.from_existing_index(
    embedding=OpenAIEmbeddings(),
    url=uri,
    username=user,
    password=password,
    database="neo4j",
    index_name=VECTOR_INDEX_NAME,
    node_label = VECTOR_NODE_LABEL,
    text_node_property=VECTOR_SOURCE_PROPERTY,
    retrieval_query=retrieval_query_window,
)

In [139]:
retriever_window = vector_store_window.as_retriever()

In [140]:
chain_window = RetrievalQAWithSourcesChain.from_chain_type(
    ChatOpenAI(temperature=0), 
    chain_type="stuff", 
    retriever=retriever_window
)

In [183]:
question = "Which company does the cherry red EcoRide belong to?"

In [184]:
answer = chain_window(

    {"question": question},
    return_only_outputs=True,
)
print(textwrap.fill(answer["answer"]))

The cherry red EcoRide belongs to the same company that produces the
Speedster 2000, EcoRide 3000, and FamilyWagon 4000.


In [164]:
question = "What are the colors red is availabe in"

In [165]:
answer = chain_window(

    {"question": question},
    return_only_outputs=True,
)
print(textwrap.fill(answer["answer"]))

Red is available in Crimson and Cherry red.


In [145]:
answer

{'answer': 'The colors available in red are Crimson and Cherry red.\n',
 'sources': 'dummySource'}