# Lesson 7: Chatting with the SEC Knowledge Graph

### Import packages and set up Neo4j

In [1]:
from dotenv import load_dotenv
import os

Load from environment

In [2]:
load_dotenv(dotenv_path=".env", override=True)

NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE") or "neo4j"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Global constants
VECTOR_INDEX_NAME = "form_10k_chunks"
VECTOR_NODE_LABEL = "Chunk"
VECTOR_SOURCE_PROPERTY = "text"
VECTOR_EMBEDDING_PROPERTY = "textEmbedding"

Warning control

In [3]:
import warnings

warnings.filterwarnings(action="ignore")

In [4]:
import textwrap

from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains import GraphCypherQAChain
from langchain.prompts.prompt import PromptTemplate

In [5]:
kg = Neo4jGraph(url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE)

### Explore the updated SEC documents graph
In this lesson, you'll be working with an updated graph that also includes the address information discussed in the video
- Some outputs below may differ slightly from the video
- Start by checking the schema of the graph

In [6]:
kg.refresh_schema()

print(textwrap.fill(text=kg.schema, width=60))

Node properties: Chunk {textEmbedding: LIST, f10kItem:
STRING, chunkSeqId: INTEGER, text: STRING, cik: STRING,
cusip6: STRING, names: LIST, formId: STRING, source: STRING,
chunkId: STRING} Form {cusip6: STRING, names: LIST, formId:
STRING, source: STRING} Company {location: POINT, cusip:
STRING, names: LIST, companyAddress: STRING, companyName:
STRING, cusip6: STRING} Manager {location: POINT,
managerName: STRING, managerCik: STRING, managerAddress:
STRING} Address {location: POINT, country: STRING, city:
STRING, state: STRING} Relationship properties: SECTION
{f10kItem: STRING} OWNS_STOCK_IN {shares: INTEGER,
reportCalendarOrQuarter: STRING, value: FLOAT} The
relationships: (:Chunk)-[:NEXT]->(:Chunk)
(:Chunk)-[:PART_OF]->(:Form) (:Form)-[:SECTION]->(:Chunk)
(:Company)-[:FILED]->(:Form)
(:Company)-[:LOCATED_AT]->(:Address)
(:Manager)-[:LOCATED_AT]->(:Address)
(:Manager)-[:OWNS_STOCK_IN]->(:Company)


- Check the address of a random Manager
- Note: The company returned by the following query may differ from the one in the video

In [7]:
cypher = """
    MATCH (mgr: Manager)-[:LOCATED_AT]->(addr: Address)
    RETURN mgr, addr
    LIMIT 1
"""

kg.query(query=cypher)

[{'mgr': {'managerCik': '1424381',
   'managerAddress': '650 Madison Avenue, 25th Floor, New York, NY, 10022',
   'location': POINT(-73.9713457 40.7639879),
   'managerName': 'LAKEWOOD CAPITAL MANAGEMENT, LP'},
  'addr': {'country': 'United States',
   'city': 'New York',
   'location': POINT(-73.9821429 40.7584882),
   'state': 'New York'}}]

- Full text search for a manager named Royal Bank
- KA: First identify the full text index created for manager names

In [10]:
cypher = """
    SHOW FULLTEXT INDEXES
"""

kg.query(query=cypher)

[{'id': 10,
  'name': 'fullTextCompanyNames',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'FULLTEXT',
  'entityType': 'NODE',
  'labelsOrTypes': ['Company'],
  'properties': ['names'],
  'indexProvider': 'fulltext-1.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 3, 12, 9, 11, 2, 914000000, tzinfo=<UTC>),
  'readCount': 25},
 {'id': 13,
  'name': 'fullTextManagerNames',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'FULLTEXT',
  'entityType': 'NODE',
  'labelsOrTypes': ['Manager'],
  'properties': ['managerName'],
  'indexProvider': 'fulltext-1.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 3, 11, 22, 16, 18, 352000000, tzinfo=<UTC>),
  'readCount': 6}]

In [12]:
cypher = """
    CALL db.index.fulltext.queryNodes(
        "fullTextManagerNames", "Royal Bank") YIELD node, score
    RETURN node.managerName, score
    LIMIT 1
"""

kg.query(query=cypher)

[{'node.managerName': 'Royal Bank of Canada', 'score': 4.431276321411133}]

- Find location of Royal Bank

In [7]:
cypher = """
    CALL db.index.fulltext.queryNodes(
        "fullTextManagerNames", "Royal Bank"
    ) yield node, score
    WITH node as mgr LIMIT 1
    MATCH (mgr: Manager)-[:LOCATED_AT]->(addr: Address)
    RETURN mgr.managerName, addr
"""

kg.query(query=cypher)

[{'mgr.managerName': 'Royal Bank of Canada',
  'addr': {'country': 'Canada',
   'city': 'Toronto',
   'location': POINT(-79.3805647 43.6508267),
   'state': 'Ontario'}}]

- Determine which state has the most investment firms

In [9]:
cypher = """
    MATCH (mgr: Manager)-[:LOCATED_AT]->(addr: Address)
    RETURN addr.state as state, count(addr.state) as numManagers
        ORDER BY numManagers DESC 
        LIMIT 10
"""

kg.query(query=cypher)

[{'state': 'New York', 'numManagers': 304},
 {'state': 'California', 'numManagers': 302},
 {'state': 'Massachusetts', 'numManagers': 146},
 {'state': 'Pennsylvania', 'numManagers': 138},
 {'state': 'Texas', 'numManagers': 125},
 {'state': 'Illinois', 'numManagers': 121},
 {'state': 'Florida', 'numManagers': 115},
 {'state': 'Connecticut', 'numManagers': 77},
 {'state': 'Ohio', 'numManagers': 76},
 {'state': 'New Jersey', 'numManagers': 69}]

- Determine which state has the most companies

In [11]:
cypher = """
    MATCH p=(:Company)-[:LOCATED_AT]->(addr: Address)
        RETURN addr.state as state, count(addr.state) as numCompanies
        ORDER BY numCompanies DESC
"""

kg.query(query=cypher)

[{'state': 'California', 'numCompanies': 7},
 {'state': 'Delaware', 'numCompanies': 1},
 {'state': 'New York', 'numCompanies': 1},
 {'state': 'Oregon', 'numCompanies': 1}]

- What are the cities in California with the most investment firms?

In [12]:
cypher = """
    MATCH p=(:Manager)-[:LOCATED_AT]->(addr: Address)
        WHERE addr.state = 'California'
    RETURN addr.city as city, count(addr.city) as numManagers
    ORDER BY numManagers DESC
"""

kg.query(query=cypher)

[{'city': 'San Francisco', 'numManagers': 48},
 {'city': 'Los Angeles', 'numManagers': 44},
 {'city': 'San Diego', 'numManagers': 17},
 {'city': 'Pasadena', 'numManagers': 13},
 {'city': 'Irvine', 'numManagers': 9},
 {'city': 'Menlo Park', 'numManagers': 9},
 {'city': 'Newport Beach', 'numManagers': 9},
 {'city': 'Walnut Creek', 'numManagers': 8},
 {'city': 'Palo Alto', 'numManagers': 6},
 {'city': 'Lafayette', 'numManagers': 6},
 {'city': 'Santa Barbara', 'numManagers': 6},
 {'city': 'San Rafael', 'numManagers': 5},
 {'city': 'Santa Monica', 'numManagers': 5},
 {'city': 'Carlsbad', 'numManagers': 5},
 {'city': 'Oakland', 'numManagers': 5},
 {'city': 'Larkspur', 'numManagers': 4},
 {'city': 'El Segundo', 'numManagers': 4},
 {'city': 'San Ramon', 'numManagers': 4},
 {'city': 'Orinda', 'numManagers': 3},
 {'city': 'Beverly Hills', 'numManagers': 3},
 {'city': 'Westlake Village', 'numManagers': 3},
 {'city': 'Los Gatos', 'numManagers': 3},
 {'city': 'Sausalito', 'numManagers': 3},
 {'city

- Which city in California has the most companies listed?

In [13]:
cypher = """
    MATCH p=(:Company)-[:LOCATED_AT]->(addr:Address)
        WHERE addr.city = 'California'
    RETURN addr.city as city, count(addr.city) as numCompanies
    ORDER BY numCompanies DESC
"""

kg.query(query=cypher)

[]

- What are the top investment firms in San Francisco?

In [15]:
cypher = """
    MATCH p=(mgr:Manager)-[:LOCATED_AT]->(addr: Address),
        (mgr)-[owns:OWNS_STOCK_IN]->(:Company)
        WHERE addr.city = 'San Francisco'
    RETURN mgr.managerName, sum(owns.value) as totalInvestmentValue
    ORDER BY totalInvestmentValue DESC
    LIMIT 10
"""

kg.query(query=cypher)

[{'mgr.managerName': 'Dodge & Cox', 'totalInvestmentValue': 3889236092000.0},
 {'mgr.managerName': 'WELLS FARGO & COMPANY/MN',
  'totalInvestmentValue': 2177580039000.0},
 {'mgr.managerName': 'CHARLES SCHWAB INVESTMENT MANAGEMENT INC',
  'totalInvestmentValue': 1944847519000.0},
 {'mgr.managerName': 'Parallax Volatility Advisers, L.P.',
  'totalInvestmentValue': 694023723000.0},
 {'mgr.managerName': 'PARNASSUS INVESTMENTS, LLC',
  'totalInvestmentValue': 211068925000.0},
 {'mgr.managerName': 'Spyglass Capital Management LLC',
  'totalInvestmentValue': 98135259000.0},
 {'mgr.managerName': 'Valiant Capital Management, L.P.',
  'totalInvestmentValue': 52124040000.0},
 {'mgr.managerName': 'Ensemble Capital Management, LLC',
  'totalInvestmentValue': 42355370000.0},
 {'mgr.managerName': 'Woodline Partners LP',
  'totalInvestmentValue': 41497384000.0},
 {'mgr.managerName': 'Alta Park Capital, LP',
  'totalInvestmentValue': 38959909000.0}]

- What companies are located in Santa Clara?

In [16]:
cypher = """
    MATCH p=(com:Company)-[:LOCATED_AT]->(addr:Address)
        WHERE addr.city = 'Santa Clara'
    RETURN com.companyName
"""

kg.query(query=cypher)

[{'com.companyName': 'PALO ALTO NETWORKS INC'},
 {'com.companyName': 'SEAGATE TECHNOLOGY'},
 {'com.companyName': 'ATLASSIAN CORP PLC'}]

- What companies are near Santa Clara?

In [18]:
cypher = """
    MATCH (sc: Address)
        WHERE sc.city = 'Santa Clara'
    MATCH (com:Company)-[:LOCATED_AT]->(addr:Address)
        WHERE point.distance(addr.location, sc.location) < 10000
    RETURN com.companyName, com.companyAddress
"""

kg.query(query=cypher)

[{'com.companyName': 'PALO ALTO NETWORKS INC',
  'com.companyAddress': '3000 Tannery Way, Santa Clara, CA 95054, USA'},
 {'com.companyName': 'GSI TECHNOLOGY INC',
  'com.companyAddress': '1213 Elko Dr, Sunnyvale, CA 94089, USA'},
 {'com.companyName': 'SEAGATE TECHNOLOGY',
  'com.companyAddress': '2445 Augustine Dr, Santa Clara, CA 95054, USA'},
 {'com.companyName': 'ATLASSIAN CORP PLC',
  'com.companyAddress': '431 El Camino Real, Santa Clara, CA 95050, USA'},
 {'com.companyName': 'APPLE INC', 'com.companyAddress': 'Cupertino, CA, USA'}]

- What investment firms are near Santa Clara?
- Try updating the distance in the query to expand the search radius

In [19]:
cypher = """
    MATCH (sc: Address)
        WHERE sc.city = "Santa Clara"
    MATCH (mgr: Manager)-[:LOCATED_AT]->(mgrAddress: Address)
        WHERE point.distance(sc.location, mgrAddress.location) < 10000
    RETURN mgr.managerName, mgr.managerAddress
"""

kg.query(query=cypher)

[{'mgr.managerName': 'Mine & Arao Wealth Creation & Management, LLC.',
  'mgr.managerAddress': '901 CAMPISI WAY, SUITE 140, CAMPBELL, CA, 95008'}]

- Which investment firms are near Palo Alto Networks?
- Note that full-text search is able to handle typos!

In [27]:
cypher = """
    CALL db.index.fulltext.queryNodes("fullTextCompanyNames", "Palo Aalto Networks")
        YIELD node, score
    WITH node as com
    MATCH (com)-[:LOCATED_AT]->(comAddress: Address),
        (mgr: Manager)-[:LOCATED_AT]->(mgrAddress: Address)
            WHERE point.distance(comAddress.location, mgrAddress.location) < 20000
    RETURN mgr, toInteger(point.distance(com.location, mgrAddress.location) / 1000) as distanceKm
        ORDER BY distanceKm ASC
        LIMIT 10
"""

kg.query(query=cypher)

[{'mgr': {'managerCik': '1611518',
   'managerAddress': '800 WEST EL CAMINO REAL SUITE 201, MOUNTAIN VIEW, CA, 94040',
   'location': POINT(-122.0842031 37.3862077),
   'managerName': 'Wealth Architects, LLC'},
  'distanceKm': 6},
 {'mgr': {'managerCik': '1802994',
   'managerAddress': '901 CAMPISI WAY, SUITE 140, CAMPBELL, CA, 95008',
   'location': POINT(-121.9342655 37.2909459),
   'managerName': 'Mine & Arao Wealth Creation & Management, LLC.'},
  'distanceKm': 11},
 {'mgr': {'managerCik': '1911695',
   'managerAddress': '1064 LAURELES DRIVE, LOS ALTOS, CA, 94022',
   'location': POINT(-122.1239047 37.4017779),
   'managerName': 'Family CFO Inc'},
  'distanceKm': 12},
 {'mgr': {'managerCik': '1630365',
   'managerAddress': '4984 EL CAMINO REAL, SUITE 101, LOS ALTOS, CA, 94022',
   'location': POINT(-122.105859 37.397132),
   'managerName': 'AIMZ Investment Advisors, LLC'},
  'distanceKm': 12},
 {'mgr': {'managerCik': '1477872',
   'managerAddress': 'P.O BOX 3552, SARATOGA, CA, 9507

- Try pausing the video and modifying queries above to further explore the graph
- You can learn more about Cypher at the neo4j website: https://neo4j.com/product/cypher-graph-query-language/ 

### Writing Cypher with an LLM

In this section, you'll use few-shot learning to teach an LLM to write Cypher
- You'll use the OpenAI's GPT 3.5 model 
- You'll also use a new Neo4j integration within LangChain called **GraphCypherQAChain**

In [29]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to 
query a graph database.
Instructions:
Use only the provided relationship types and properties in the 
schema. Do not use any other relationship types or properties that 
are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than 
for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher 
statements for particular questions:

# What investment firms are in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName
The question is:
{question}"""

In [30]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

In [31]:
cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

In [9]:
def prettyCypherChain(question: str) -> str:
    response = cypherChain.run(question)
    print(textwrap.fill(text=response, width=60))

In [39]:
prettyCypherChain("What investment firms are in San Francisco?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName[0m
Full Context:
[32;1m[1;3m[{'mgr.managerName': 'PARNASSUS INVESTMENTS, LLC'}, {'mgr.managerName': 'SKBA CAPITAL MANAGEMENT LLC'}, {'mgr.managerName': 'ROSENBLUM SILVERMAN SUTTON S F INC /CA'}, {'mgr.managerName': 'CHARLES SCHWAB INVESTMENT MANAGEMENT INC'}, {'mgr.managerName': 'WELLS FARGO & COMPANY/MN'}, {'mgr.managerName': 'Dodge & Cox'}, {'mgr.managerName': 'Strait & Sound Wealth Management LLC'}, {'mgr.managerName': 'Sonoma Private Wealth LLC'}, {'mgr.managerName': 'Fund Management at Engine No. 1 LLC'}, {'mgr.managerName': 'SELDON CAPITAL LP'}][0m

[1m> Finished chain.[0m
PARNASSUS INVESTMENTS, LLC, SKBA CAPITAL MANAGEMENT LLC,
ROSENBLUM SILVERMAN SUTTON S F INC /CA, and Strait & Sound
Wealth Management LLC are investment firms in San Francisco.


In [35]:
prettyCypherChain("What investment firms are in Menlo Park?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'Menlo Park'
RETURN mgr.managerName[0m
Full Context:
[32;1m[1;3m[{'mgr.managerName': 'Bordeaux Wealth Advisors LLC'}, {'mgr.managerName': 'Opes Wealth Management LLC'}, {'mgr.managerName': 'Solstein Capital, LLC'}, {'mgr.managerName': 'Stamos Capital Partners, L.P.'}, {'mgr.managerName': 'TIEMANN INVESTMENT ADVISORS, LLC'}, {'mgr.managerName': 'SCGE MANAGEMENT, L.P.'}, {'mgr.managerName': 'Nelson Capital Management, LLC'}, {'mgr.managerName': 'Jasper Ridge Partners, L.P.'}, {'mgr.managerName': 'CROSSLINK CAPITAL INC'}][0m

[1m> Finished chain.[0m
Bordeaux Wealth Advisors LLC, Opes Wealth Management LLC,
Solstein Capital, LLC, Stamos Capital Partners, L.P.,
TIEMANN INVESTMENT ADVISORS, LLC, SCGE MANAGEMENT, L.P.,
Nelson Capital Management, LLC, Jasper Ridge Partners, L.P.,
CROSSLINK CAPITAL INC are investment firms 

In [36]:
prettyCypherChain("What companies are in Santa Clara?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (comp:Company)-[:LOCATED_AT]->(compAddress:Address)
    WHERE compAddress.city = 'Santa Clara'
RETURN comp.companyName[0m
Full Context:
[32;1m[1;3m[{'comp.companyName': 'PALO ALTO NETWORKS INC'}, {'comp.companyName': 'SEAGATE TECHNOLOGY'}, {'comp.companyName': 'ATLASSIAN CORP PLC'}][0m

[1m> Finished chain.[0m
PALO ALTO NETWORKS INC, SEAGATE TECHNOLOGY, ATLASSIAN CORP
PLC are in Santa Clara.


In [37]:
prettyCypherChain("What companies are near Santa Clara?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (comp:Company)-[:LOCATED_AT]->(compAddress:Address)
    WHERE compAddress.city = 'Santa Clara'
RETURN comp.companyName[0m
Full Context:
[32;1m[1;3m[{'comp.companyName': 'PALO ALTO NETWORKS INC'}, {'comp.companyName': 'SEAGATE TECHNOLOGY'}, {'comp.companyName': 'ATLASSIAN CORP PLC'}][0m

[1m> Finished chain.[0m
PALO ALTO NETWORKS INC, SEAGATE TECHNOLOGY, ATLASSIAN CORP
PLC are near Santa Clara.


### Expand the prompt to teach the LLM new Cypher patterns

In [45]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to 
query a graph database.
Instructions:
Use only the provided relationship types and properties in the 
schema. Do not use any other relationship types or properties that 
are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than 
for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher 
statements for particular questions:

# What investment firms are in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName

# What investment firms are near Santa Clara?
MATCH (address: Address)
    WHERE address.city = 'Santa Clara'
MATCH (mgr: Manager)-[:LOCATED_AT]->(mgrAddress: Address)
    WHERE point.distance(address.location, mgrAddress.location) < 10000
RETURN mgr.managerName, mgr.managerAddress

The question is:
{question}"""

- Update Cypher generation prompt with new template, and re-initialize the Cypher chain to use the new prompt
- Rerun this code anytime you make a change to the Cypher generation template!

In [47]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

In [48]:
prettyCypherChain("What investment firms are near Santa Clara?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (address: Address)
    WHERE address.city = 'Santa Clara'
MATCH (mgr: Manager)-[:LOCATED_AT]->(mgrAddress: Address)
    WHERE point.distance(address.location, mgrAddress.location) < 10000
RETURN mgr.managerName, mgr.managerAddress[0m
Full Context:
[32;1m[1;3m[{'mgr.managerName': 'Mine & Arao Wealth Creation & Management, LLC.', 'mgr.managerAddress': '901 CAMPISI WAY, SUITE 140, CAMPBELL, CA, 95008'}][0m

[1m> Finished chain.[0m
Mine & Arao Wealth Creation & Management, LLC. is near Santa
Clara.


### Expand the query to retrieve information from the Form 10K chunks

In [49]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to 
query a graph database.
Instructions:
Use only the provided relationship types and properties in the 
schema. Do not use any other relationship types or properties that 
are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than 
for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher 
statements for particular questions:

# What investment firms are in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName

# What investment firms are near Santa Clara?
MATCH (address: Address)
    WHERE address.city = 'Santa Clara'
MATCH (mgr: Manager)-[:LOCATED_AT]->(mgrAddress: Address)
    WHERE point.distance(address.location, mgrAddress.location) < 10000
RETURN mgr.managerName, mgr.managerAddress

# What does Palo Alto Networks do?
  CALL db.index.fulltext.queryNodes(
         "fullTextCompanyNames", 
         "Palo Alto Networks"
         ) YIELD node, score
  WITH node as com
  MATCH (com)-[:FILED]->(f:Form),
    (f)-[s:SECTION]->(c:Chunk)
  WHERE s.f10kItem = "item1"
RETURN c.text

The question is:
{question}"""

In [50]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

In [51]:
prettyCypherChain("What does Palo Alto Networks do?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mCALL db.index.fulltext.queryNodes(
     "fullTextCompanyNames", 
     "Palo Alto Networks"
     ) YIELD node, score
WITH node as com
MATCH (com)-[:FILED]->(f:Form),
  (f)-[s:SECTION]->(c:Chunk)
WHERE s.f10kItem = "item1"
RETURN c.text[0m
Full Context:
[32;1m[1;3m[{'c.text': '>Item 1. Business\nGeneral\nPalo Alto Networks, Inc. is a global cybersecurity provider with a vision of a world where each day is safer and more secure than the one before. We were incorporated in 2005 and are headquartered in Santa Clara, California.\nWe empower enterprises, organizations, service providers, and government entities to protect themselves against today’s most sophisticated cyber threats. Our cybersecurity platforms and services help secure enterprise users, networks, clouds, and endpoints by delivering comprehensive cybersecurity backed by industry-leading artificial intelligence and automation. We are a leading 

### Try for yourself!

- Update the Cypher generation prompt below to ask different questions of the graph
- You can run the "check schema" cell to be reminded of the graph structure
- Use any of the examples in this notebook, or in previous lessons, to get started
- Remember to update the prompt template and restart the GraphCypherQAChain each time you make updates!

In [52]:
# Check the graph schema
kg.refresh_schema()
print(textwrap.fill(kg.schema, width=60))

Node properties: Chunk {textEmbedding: LIST, f10kItem:
STRING, chunkSeqId: INTEGER, text: STRING, cik: STRING,
cusip6: STRING, names: LIST, formId: STRING, source: STRING,
chunkId: STRING} Form {cusip6: STRING, names: LIST, formId:
STRING, source: STRING} Company {location: POINT, cusip:
STRING, names: LIST, companyAddress: STRING, companyName:
STRING, cusip6: STRING} Manager {location: POINT,
managerName: STRING, managerCik: STRING, managerAddress:
STRING} Address {location: POINT, country: STRING, city:
STRING, state: STRING} Relationship properties: SECTION
{f10kItem: STRING} OWNS_STOCK_IN {shares: INTEGER,
reportCalendarOrQuarter: STRING, value: FLOAT} The
relationships: (:Chunk)-[:NEXT]->(:Chunk)
(:Chunk)-[:PART_OF]->(:Form) (:Form)-[:SECTION]->(:Chunk)
(:Company)-[:FILED]->(:Form)
(:Company)-[:LOCATED_AT]->(:Address)
(:Manager)-[:LOCATED_AT]->(:Address)
(:Manager)-[:OWNS_STOCK_IN]->(:Company)


In [53]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to 
query a graph database.
Instructions:
Use only the provided relationship types and properties in the 
schema. Do not use any other relationship types or properties that 
are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than 
for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher 
statements for particular questions:

# What investment firms are in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName

# What investment firms are near Santa Clara?
MATCH (address: Address)
    WHERE address.city = 'Santa Clara'
MATCH (mgr: Manager)-[:LOCATED_AT]->(mgrAddress: Address)
    WHERE point.distance(address.location, mgrAddress.location) < 10000
RETURN mgr.managerName, mgr.managerAddress

# What does Palo Alto Networks do?
  CALL db.index.fulltext.queryNodes(
         "fullTextCompanyNames", 
         "Palo Alto Networks"
         ) YIELD node, score
  WITH node as com
  MATCH (com)-[:FILED]->(f:Form),
    (f)-[s:SECTION]->(c:Chunk)
  WHERE s.f10kItem = "item1"
RETURN c.text

The question is:
{question}"""

In [54]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

In [55]:
prettyCypherChain("Determine which state has the most companies?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Company)-[:LOCATED_AT]->(a:Address)
WITH a.state AS state, COUNT(c) AS numCompanies
RETURN state, numCompanies
ORDER BY numCompanies DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'state': 'California', 'numCompanies': 7}][0m

[1m> Finished chain.[0m
California has the most companies with a total of 7.


In [56]:
prettyCypherChain("Determine total investment by Spyglass Capital Management?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager {managerName: 'Spyglass Capital Management'})-[:OWNS_STOCK_IN]->(company:Company)
RETURN SUM(company.value) as total_investment[0m
Full Context:
[32;1m[1;3m[{'total_investment': 0}][0m

[1m> Finished chain.[0m
The total investment by Spyglass Capital Management is 0.


In [57]:
prettyCypherChain("Determine total investment by Spyglass Capital Management LLC?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager {managerName: 'Spyglass Capital Management LLC'})-[:OWNS_STOCK_IN]->(company:Company)
RETURN SUM(company.value) as total_investment[0m
Full Context:
[32;1m[1;3m[{'total_investment': 0}][0m

[1m> Finished chain.[0m
The total investment by Spyglass Capital Management LLC is
0.


Observations
- The query is trying to compute investment value from a **non-existent** `value` field in `Company`. It should read from the `value` field in the relationship `OWNS_STOCK_IN`
- If the exact name of investment firm is not provided, the query should use full text search.

In [6]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to 
query a graph database.
Instructions:
Use only the provided relationship types and properties in the 
schema. Do not use any other relationship types or properties that 
are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than 
for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher 
statements for particular questions:

# What investment firms are in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName

# What investment firms are near Santa Clara?
MATCH (address: Address)
    WHERE address.city = 'Santa Clara'
MATCH (mgr: Manager)-[:LOCATED_AT]->(mgrAddress: Address)
    WHERE point.distance(address.location, mgrAddress.location) < 10000
RETURN mgr.managerName, mgr.managerAddress

# What does Palo Alto Networks do?
CALL db.index.fulltext.queryNodes(
        "fullTextCompanyNames", 
        "Palo Alto Networks"
        ) YIELD node, score
WITH node as com
MATCH (com)-[:FILED]->(f:Form),
(f)-[s:SECTION]->(c:Chunk)
WHERE s.f10kItem = "item1"
RETURN c.text

# What is the total investment by the management firm Nelson Capital Management
CALL db.index.fulltext.queryNodes(
    "fullTextManagerNames", "Nelson Capital Management"
    ) YIELD node, score
WITH node as mgr
MATCH (mgr)-[owns:OWNS_STOCK_IN]->(com: Company)
RETURN mgr.managerName, SUM(owns.value) as totalInvestment

The question is:
{question}"""

In [7]:
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

In [10]:
prettyCypherChain("Determine total investment by Spyglass Capital Management LLC?")

  warn_deprecated(




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mCALL db.index.fulltext.queryNodes(
    "fullTextManagerNames", "Spyglass Capital Management LLC"
    ) YIELD node, score
WITH node as mgr
MATCH (mgr)-[owns:OWNS_STOCK_IN]->(com: Company)
RETURN mgr.managerName, SUM(owns.value) as totalInvestment[0m
Full Context:
[32;1m[1;3m[{'mgr.managerName': 'Spyglass Capital Management LLC', 'totalInvestment': 98135259000.0}, {'mgr.managerName': 'SANDS CAPITAL MANAGEMENT, LLC', 'totalInvestment': 510049680000.0}, {'mgr.managerName': 'CHILTON CAPITAL MANAGEMENT LLC', 'totalInvestment': 22349034000.0}, {'mgr.managerName': 'BUCKHEAD CAPITAL MANAGEMENT LLC', 'totalInvestment': 240215000.0}, {'mgr.managerName': 'Windsor Capital Management, LLC', 'totalInvestment': 657163000.0}, {'mgr.managerName': 'GLENVIEW CAPITAL MANAGEMENT, LLC', 'totalInvestment': 23706181000.0}, {'mgr.managerName': 'Ionic Capital Management LLC', 'totalInvestment': 2047158000.0}, {'mgr.managerName

In [11]:
prettyCypherChain("What is the total shares owned by Spyglass Capital Management LLC?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mCALL db.index.fulltext.queryNodes(
    "fullTextManagerNames", "Spyglass Capital Management LLC"
    ) YIELD node, score
WITH node as mgr
MATCH (mgr)-[owns:OWNS_STOCK_IN]->(com: Company)
RETURN mgr.managerName, SUM(owns.shares) as totalSharesOwned[0m
Full Context:
[32;1m[1;3m[{'mgr.managerName': 'Spyglass Capital Management LLC', 'totalSharesOwned': 384076}, {'mgr.managerName': 'SANDS CAPITAL MANAGEMENT, LLC', 'totalSharesOwned': 4621271}, {'mgr.managerName': 'CHILTON CAPITAL MANAGEMENT LLC', 'totalSharesOwned': 88146}, {'mgr.managerName': 'BUCKHEAD CAPITAL MANAGEMENT LLC', 'totalSharesOwned': 969}, {'mgr.managerName': 'Windsor Capital Management, LLC', 'totalSharesOwned': 5954}, {'mgr.managerName': 'GLENVIEW CAPITAL MANAGEMENT, LLC', 'totalSharesOwned': 95628}, {'mgr.managerName': 'Ionic Capital Management LLC', 'totalSharesOwned': 8258}, {'mgr.managerName': 'JS Capital Management LLC', 'totalShares