# Compare Question Answering Chains


## Setup

- import python libaries
- connect to Neo4j
- create QA chains

In [None]:
from dotenv import load_dotenv
import os
import textwrap

# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import GraphCypherQAChain

# Load from environment
load_dotenv('.env', override=True)
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Global constants
VECTOR_INDEX_NAME = 'form_10k_chunks'
VECTOR_NODE_LABEL = 'Chunk'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'


In [None]:
print(f"Connecting to Neo4j at {NEO4J_URI} as {NEO4J_USERNAME}")

# Create a knowledge graph using Langchain's Neo4j integration.
# This will be used for direct querying of the knowledge graph. 
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

# OpenAI for creating embeddings
embeddings_model = OpenAIEmbeddings()


In [None]:
def prettifyChain(chain):
    """ Pretty print the chain response, returning the response as well. """
    def prettychain(question:str):
      response = chain({"question": question},return_only_outputs=True,)
      print(textwrap.fill(response['answer'], 80))
    return prettychain

## Vector search

In [None]:

retrieval_query_sources = """
MATCH (node)-[:PART_OF]->(form:Form)
RETURN node.text as text,
    score,
    form {.source} AS metadata
"""

# Create a langchain vector store from the existing Neo4j knowledge graph.
vector_store_search = Neo4jVector.from_existing_graph(
    embedding=embeddings_model,
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name=VECTOR_INDEX_NAME,
    node_label=VECTOR_NODE_LABEL,
    text_node_properties=[VECTOR_SOURCE_PROPERTY],
    embedding_node_property=VECTOR_EMBEDDING_PROPERTY,
    retrieval_query=retrieval_query_sources
)
vector_search_qa = prettifyChain(RetrievalQAWithSourcesChain.from_chain_type(
    ChatOpenAI(temperature=0), 
    chain_type="stuff", 
    retriever=vector_store_search.as_retriever()
))

## Vector search examples

In [None]:
vector_search_qa("Where is Netapp headquartered?")

In [None]:
vector_search_qa("What is Netapp's primary business?")

In [None]:
vector_search_qa("Who is Netapp's primary investor?")

In [None]:
vector_search_qa("What are Netapp's main products?")

## Vector search with window

In [None]:
retrieval_query_window = """
MATCH window=
    (:Chunk)-[:NEXT*0..1]->(node)-[:NEXT*0..1]->(:Chunk)
WITH node, score, window as longestWindow 
  ORDER BY length(window) DESC LIMIT 1
WITH nodes(longestWindow) as chunkList, node, score
  UNWIND chunkList as chunkRows
WITH collect(chunkRows.text) as textList, node, score
RETURN apoc.text.join(textList, " \n ") as text,
    score,
    node {.source} AS metadata
"""

vector_store_window = Neo4jVector.from_existing_index(
    embedding=OpenAIEmbeddings(),
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database="neo4j",
    index_name=VECTOR_INDEX_NAME,
    text_node_property=VECTOR_SOURCE_PROPERTY,
    retrieval_query=retrieval_query_window
)
window_qa = prettifyChain(RetrievalQAWithSourcesChain.from_chain_type(
    ChatOpenAI(temperature=0), 
    chain_type="stuff", 
    retriever=vector_store_window.as_retriever()
))

def compare_search_qa(question:str):
  print("Vector search then return single result...")
  vector_search_qa(question)
  print("\n")
  print("Vector search, then return a window of results...")
  window_qa(question)

In [None]:
compare_search_qa("What is most important to Netapp's business?")

## Mixed Text and Data examples

In [None]:
investment_retrieval_query = """
MATCH (node)-[:PART_OF]->(f:Form),
    (f)<-[:FILED]-(com:Company),
    (com)<-[owns:OWNS_STOCK_IN]-(mgr:Manager)
WITH node, score, mgr, owns, com 
    ORDER BY owns.shares DESC LIMIT 10
WITH collect (
    mgr.name + 
    " owns " + owns.shares + " of " + com.name + 
    " at a value of $" + apoc.number.format(owns.value) + "." 
) AS investment_statements, com, node, score
RETURN apoc.text.join(investment_statements, "\n") + 
    "\n" + "About " + com.name + "... " + node.text AS text,
    score,
    { 
      source: node.source
    } as metadata
"""

vector_store_with_investment = Neo4jVector.from_existing_index(
    embeddings_model,
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database="neo4j",
    index_name=VECTOR_INDEX_NAME,
    text_node_property=VECTOR_SOURCE_PROPERTY,
    retrieval_query=investment_retrieval_query,
)
mixed_text_data_qa = prettifyChain(RetrievalQAWithSourcesChain.from_chain_type(
    ChatOpenAI(temperature=0), 
    chain_type="stuff", 
    retriever=vector_store_with_investment.as_retriever()
))

def compare_mixed_qa(question:str):
  print("Vector search then return single result...")
  vector_search_qa(question)
  print("\n")
  print("Vector search, then return a window of results...")
  window_qa(question)
  print("\n")
  print("Vector search, then pattern match (:Chunk)-->(:Form)<--(:Company)<--(:Manager)...")
  mixed_text_data_qa(question)

In [None]:
compare_mixed_qa("Who are Netapp's top investors?")

## Text to Cypher examples

In [None]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher statements for particular questions:

# Which city in California has the most companies listed?
MATCH p=(:Company)-[:LOCATED_AT]->(address:Address)
        WHERE address.state = 'California'
RETURN address.city as city, count(address.city) as numCompanies
  ORDER BY numCompanies DESC

# What are the cities in California with the most investment firms?
MATCH p=(:Manager)-[:LOCATED_AT]->(address:Address)
        WHERE address.state = 'California'
RETURN address.city as city, count(address.city) as numManagers
  ORDER BY numManagers DESC
  LIMIT 10

# How many investment firms are in Los Angeles?
MATCH p=(:Manager)-[:LOCATED_AT]->(address:Address)
        WHERE address.city = 'Los Angeles'
RETURN count(*) as numManagers

# What investment firms are in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.name

# List the top 10 investment firms in San Francisco.
MATCH p=(mgr:Manager)-[:LOCATED_AT]->(address:Address),
        (mgr)-[owns:OWNS_STOCK_IN]->(:Company)
        WHERE address.city = "San Francisco"
RETURN mgr.name as firm, sum(owns.value) as totalInvestmentValue
  ORDER BY totalInvestmentValue DESC
  LIMIT 10

# Who are Netapp's top investors?
CALL db.index.fulltext.queryNodes(
        "fullTextCompanyNames", 
        "Netapp"
        ) YIELD node, score
WITH node as com
MATCH p=(mgr:Manager)-[owns:OWNS_STOCK_IN]->(com)
RETURN mgr.name as firm, sum(owns.value) as totalInvestmentValue
  ORDER BY totalInvestmentValue DESC
  LIMIT 10

# What companies are in Santa Clara?
MATCH (com:Company)-[:LOCATED_AT]->(comAddress:Address)
    WHERE comAddress.city = 'Santa Clara'
RETURN com.name

# What investment firms are near Santa Clara?
  MATCH (address:Address)
    WHERE address.city = "Santa Clara"
  MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)
    WHERE point.distance(address.location, managerAddress.location) < 20 * 1000
  RETURN mgr.name, mgr.address

# Where are Netapp's top investors located?
CALL db.index.fulltext.queryNodes(
        "fullTextCompanyNames", 
        "Palo Aalto Networks"
        ) YIELD node, score
WITH node as com
MATCH (mgr:Manager)-[owns:OWNS_STOCK_IN]->(com),
  (mgr)-[:LOCATED_AT]->(mgrAddress:Address)
RETURN mgr.name as firm, mgrAddress, sum(owns.value) as totalInvestmentValue
  ORDER BY totalInvestmentValue DESC
  LIMIT 10

# Which investment firms are near Palo Aalto Networks?
  CALL db.index.fulltext.queryNodes(
         "fullTextCompanyNames", 
         "Palo Aalto Networks"
         ) YIELD node, score
  WITH node as com
  MATCH (com)-[:LOCATED_AT]->(comAddress:Address),
    (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE point.distance(comAddress.location, mgrAddress.location) < 20 * 1000
  RETURN mgr, 
    toInteger(point.distance(comAddress.location, mgrAddress.location) / 1000) as distanceKm
    ORDER BY distanceKm ASC
    LIMIT 10
  
The question is:
{question}"""
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=False, # Change this to True to see the generated Cypher statement
    cypher_prompt=CYPHER_GENERATION_PROMPT,
)


def compare_all_qa(question:str):
  print("Vector search...")
  vector_search_qa(question)
  print("\n")
  print("Vector search with result windows...")
  window_qa(question)
  print("\n")
  print("Vector search and graph pattern match...")
  mixed_text_data_qa(question)
  print("\n")
  print("Text to cypher...")
  cypherResult = cypherChain(question)
  print(textwrap.fill(cypherResult['result'], 80))

## Compare all Question Answering Approaches

1. Vector search
2. Vector search with result windows
3. Vector search and graph pattern match `(:Chunk)-->(:Form)<--(:Company)<--(:Manager)`
4. Text to Cypher, generating a query from the user question

In [None]:
compare_all_qa("What is most important to Netapp's business?")

In [None]:
compare_all_qa("Who are Netapp's top investors?")

In [None]:
compare_all_qa("Where are Netapp's top investors located?")

In [None]:
compare_all_qa("What investment firms are in San Francisco?")

In [None]:
compare_all_qa("List the top 10 investment firms in San Francisco.")

In [None]:
compare_all_qa("Which city in California has the most companies listed?")

In [None]:
compare_all_qa("What companies are in Santa Clara?")

In [None]:
compare_all_qa("Where is Palo Alto Networks headquartered?")

In [None]:
compare_all_qa("Which investment firms are near Palo Aalto Networks?")

In [None]:
compare_all_qa("Where is Blackrock headquartered?")