# Graph Generic Cyher or QA Graph Cyher are suitable fetching the exact nodes relating to graph

In [1]:
import pinecone
import openai
from dotenv import load_dotenv
import os
from neo4j import GraphDatabase
from langchain.vectorstores import Pinecone
import pinecone
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI
from langchain.embeddings.base import Embeddings
from langchain.embeddings.openai import OpenAIEmbeddings

load_dotenv()
neo4j_url = os.getenv('NEO4J_URL')
neo4j_user = os.getenv('NEO4J_USER')
neo4j_password = os.getenv('NEO4J_PASSWORD')
openai_key = os.getenv('OPENAI_KEY')
os.environ['OPENAI_API_KEY'] = openai_key
pinecone_api_key = os.getenv('PINECONE_KEY')

embeddings = OpenAIEmbeddings()

  from tqdm.autonotebook import tqdm


# Step 1. Get names of nodes, edges, labels

In [2]:
# Get all the nodes names

driver = GraphDatabase.driver(neo4j_url, auth=(neo4j_user, neo4j_password))
# Divide the graph into trunks
def get_pair_nodes (tx):
    pairs_of_nodes = []
    for record in tx.run("MATCH (a)-[r]->(b) RETURN labels(a), a.name, type(r), labels(b), b.name"):
        pair_node = {
            "start_node_name": record["a.name"],
            "start_node_label":record["labels(a)"][0],
            "end_node_name": record["b.name"],
            "end_node_label":record["labels(b)"][0],
            "edge": record["type(r)"]
        }
        pairs_of_nodes.append(pair_node)
        # print(record["labels(a)"][0], record["a.name"], record["type(r)"], record["labels(b)"][0], record["b.name"])
    return pairs_of_nodes


In [3]:
with driver.session() as session:
    pairs_of_nodes = session.execute_read(get_pair_nodes)
driver.close()
len(pairs_of_nodes)

574

# Step 2: Build vectors

In [4]:
node_names = set()
node_types = set()
edge_types = set()

for p_n in pairs_of_nodes:
    start_node_name = p_n["start_node_name"]
    end_node_name = p_n["end_node_name"]
    start_node_label = p_n["start_node_label"]
    end_node_label = p_n["end_node_label"]
    edge = p_n["edge"]
    
    if isinstance(start_node_name, str):
        node_names.add(start_node_name)
    if isinstance(end_node_name, str):
        node_names.add(end_node_name)
        
    if isinstance(start_node_label, str):
        node_types.add(start_node_label)
    if isinstance(end_node_label, str):
        node_types.add(end_node_label)
        
    if isinstance(edge, str):
        edge_types.add(edge)

In [5]:
p_n

{'start_node_name': 'Incidents and Near Misses',
 'start_node_label': 'Datasets',
 'end_node_name': 'Empowered Decisions',
 'end_node_label': 'User_stories',
 'edge': 'contributes_to'}

In [6]:
node_vectors = []
for n in node_names:
    ele_info = {
        "value": embeddings.embed_documents([n]),
        "meta_data": {
            "source":"ps-graph-01",
            "info_type": "node_names",
            "name":n
        },
    } 
    node_vectors.append(ele_info)

for n in node_types:
    ele_info = {
        "value": embeddings.embed_documents([n]),
        "meta_data": {
            "source":"ps-graph-01",
            "info_type": "node_types",
            "name":n
        },
    } 
    node_vectors.append(ele_info)

for n in edge_types:
    ele_info = {
        "value": embeddings.embed_documents([n]),
        "meta_data": {
            "source":"ps-graph-01",
            "info_type": "edges",
            "name":n
        },
    } 
    node_vectors.append(ele_info)
node_vectors[:5]

[{'value': [[-0.028725597553567007,
    -0.025947886860357525,
    -0.003238404794159045,
    -0.004478212199935235,
    -0.012025456533997067,
    0.009715213916815823,
    -0.025920787234752333,
    -0.016747566761362954,
    -0.017953499169471344,
    -0.0022645121153878115,
    0.01579907986518126,
    0.010616276933849743,
    0.01738440703176233,
    0.005494448160129907,
    0.006978152662014127,
    0.0017259071728999628,
    0.03997194217972138,
    -0.0205144128578393,
    -0.009532291909642093,
    -0.015880378741996833,
    -0.029267590065670833,
    -0.01632752163315987,
    -0.0006639407109118582,
    0.004342714071909278,
    -0.021273202374784653,
    -0.0037228103690211834,
    0.014769293160861372,
    -0.03615089310674417,
    0.018780036819107056,
    0.0030571759315090007,
    0.02391541493996818,
    -0.009281619907132764,
    -0.0031266188385376307,
    -0.008468631138977027,
    -0.013305914309503664,
    -0.03563600208289078,
    0.0075743439596670245,
    -0.0

In [9]:
len(node_vectors)

398

In [10]:
# Setting of pinecone
env_name = "us-west4-gcp-free"
index_name = "test-chatbot-ran"
pinecone.init(api_key=pinecone_api_key,environment=env_name)
index = pinecone.Index(index_name)

# upload vectors 
my_namespace = 'graph'
index.delete(deleteAll='true', namespace=my_namespace)

{}

In [12]:
from math import ceil

def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

pinecone_vectors = [(str(idx), n_v["value"], n_v["meta_data"]) for idx, n_v in enumerate(node_vectors)]

# Define the chunk size; you might need to adjust this depending on the size of your individual vectors
chunk_size = 10

# Iterate over the chunks and upsert each one separately
for chunk in chunks(pinecone_vectors, chunk_size):
    upsert_response = index.upsert(vectors=chunk, namespace=my_namespace)

# Step 3: Add additional hint on top of original question


In [7]:
prompt = "What types of business value does Unilever create?"

In [8]:
env_name = "us-west4-gcp-free"
index_name = "test-chatbot-ran"
pinecone.init(api_key=pinecone_api_key,environment=env_name)
index = pinecone.Index(index_name)

vectorstore = Pinecone(index , embeddings.embed_query, "name", namespace=my_namespace)

In [9]:
docs = vectorstore.similarity_search(prompt, k=10)
docs

[Document(page_content='Value_Unilever_create', metadata={'info_type': 'node_types', 'source': 'ps-graph-01'}),
 Document(page_content='Business_Transformation_themes', metadata={'info_type': 'node_types', 'source': 'ps-graph-01'}),
 Document(page_content='Business_disruption_', metadata={'info_type': 'node_names', 'source': 'ps-graph-01'}),
 Document(page_content='1._Winning_with_brands_and_innovation', metadata={'info_type': 'node_names', 'source': 'ps-graph-01'}),
 Document(page_content='Business_transformation_portfolio', metadata={'info_type': 'node_names', 'source': 'ps-graph-01'}),
 Document(page_content='7._Consumer_insight_', metadata={'info_type': 'node_names', 'source': 'ps-graph-01'}),
 Document(page_content='Supply_chain_strategy', metadata={'info_type': 'node_types', 'source': 'ps-graph-01'}),
 Document(page_content='4._Marketing', metadata={'info_type': 'node_names', 'source': 'ps-graph-01'}),
 Document(page_content='Divestment_', metadata={'info_type': 'node_names', 'so

In [10]:
related_node_names = ""
related_node_types = ""
related_edges = ""
for d in docs: 
    if d.metadata["info_type"] == "node_names":
        related_node_names+=d.page_content + ", "
    
    if d.metadata["info_type"] == "node_types":
        related_node_types+=d.page_content + ", "
    
    if d.metadata["info_type"] == "edges":
        related_edges+=d.page_content + ", "

print(related_node_names)
print(related_node_types) 
print(related_edges)

Business_disruption_, 1._Winning_with_brands_and_innovation, Business_transformation_portfolio, 7._Consumer_insight_, 4._Marketing, Divestment_, 1._Consumer_benefits, 
Value_Unilever_create, Business_Transformation_themes, Supply_chain_strategy, 



In [11]:
additional_hint = f"""Hint: Please refer the names of nodes:{related_node_names} or the labels of nodes: {related_node_types} or the edges type: {related_edges} if necessary"""
additional_hint

'Hint: Please refer the names of nodes:Business_disruption_, 1._Winning_with_brands_and_innovation, Business_transformation_portfolio, 7._Consumer_insight_, 4._Marketing, Divestment_, 1._Consumer_benefits,  or the labels of nodes: Value_Unilever_create, Business_Transformation_themes, Supply_chain_strategy,  or the edges type:  if necessary'

# Add additional hint on top of QA graph

In [12]:
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph
from langchain.chat_models import ChatOpenAI

graph = Neo4jGraph(
    url=neo4j_url, 
    username=neo4j_user, 
    password=neo4j_password
)

chain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0), graph=graph, verbose=True,
)

result = chain.run(prompt + additional_hint)
result



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (n:Value_Unilever_create)-[:enabled_by]->(m:Business_Transformation_themes)-[:comprised_of]->(p:Programme)
RETURN n, m, p[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"Unilever creates various types of business value through its focus on business disruption, winning with brands and innovation, business transformation portfolio, consumer insight, marketing, divestment, and consumer benefits. These efforts contribute to the overall value that Unilever creates as a company. Additionally, Unilever's business transformation themes and supply chain strategy play a role in generating business value."