In [2]:
import os

from dotenv import load_dotenv



In [3]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")


In [5]:
from langchain_core.tools import create_retriever_tool
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings

NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'


company_info_cypher = """
MATCH (node)-[:PART_OF]->(f:Form),
    (f)<-[:FILED]-(com:Company),
    (com)<-[owns:OWNS_STOCK_IN]-(mgr:Manager)
WITH node, score, mgr, owns, com 
    ORDER BY owns.shares DESC LIMIT 10
WITH collect (
    mgr.name + 
    " owns " + owns.shares + " of " + com.name + 
    " at a value of $" + apoc.number.format(owns.value) + "." 
) AS investment_statements, com, node, score
RETURN 
    "Investors in " + com.name + " include...\n" +
    apoc.text.join(investment_statements, "\n") + 
    "\n" + 
    "Information about " + com.name + " that is relevant to the user question...\n" + node.text AS text,
    score,
    { 
      source: node.source
    } as metadata
"""

embeddings_api = OpenAIEmbeddings()

neo4j_vector_store = Neo4jVector.from_existing_graph(
    embedding=embeddings_api,
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name="form_10k_chunks",
    node_label="Chunk",
    text_node_properties=["text"],
    embedding_node_property="textEmbedding",
    retrieval_query=company_info_cypher,
)
# Create a retriever from the vector store
company_retriever = neo4j_vector_store.as_retriever()

company_tool = create_retriever_tool(
    name="company_retriever",
    retriever=company_retriever,
    description="Search and return information about something....",
)


In [16]:
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import GraphCypherQAChain
from langchain_community.graphs import Neo4jGraph
from langchain.tools import Tool

CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher statements for particular questions:

# What are the top investment firms in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName

# What companies are in Santa Clara?
MATCH (com:Company)-[:LOCATED_AT]->(comAddress:Address)
    WHERE comAddress.city = 'Santa Clara'
RETURN com.companyName

# What investment firms are near Santa Clara?
  MATCH (address:Address)
    WHERE address.city = "Santa Clara"
  MATCH (mgr:Manager)-[:LOCATED_AT]->(managerAddress:Address)
    WHERE point.distance(address.location, managerAddress.location) < 20 * 1000
  RETURN mgr.managerName, mgr.managerAddress

# Which investment firms are near Palo Aalto Networks?
  CALL db.index.fulltext.queryNodes(
         "fullTextCompanyNames", 
         "Palo Aalto Networks"
         ) YIELD node, score
  WITH node as com
  MATCH (com)-[:LOCATED_AT]->(comAddress:Address),
    (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE point.distance(comAddress.location, mgrAddress.location) < 20 * 1000
  RETURN mgr, 
    toInteger(point.distance(comAddress.location, mgrAddress.location) / 1000) as distanceKm
    ORDER BY distanceKm ASC
    LIMIT 10
  
The question is:
{question}"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)
cypher_chain = GraphCypherQAChain.from_llm(
    llm,
    graph=kg,
    verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT,
    allow_dangerous_requests=True
)

cypher_tool = Tool.from_function(
    name="GraphCypherQAChain",
    description="Use Cypher to generate information about companies and investors",
    func=cypher_chain.run,
    return_direct=True
)


In [9]:
cypher_chain.invoke({"query": "What are the top investment firms are in San Francisco"})




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.name[0m
Full Context:
[32;1m[1;3m[{'mgr.name': 'OSBORNE PARTNERS CAPITAL MANAGEMENT, LLC'}, {'mgr.name': 'OSTERWEIS CAPITAL MANAGEMENT INC'}, {'mgr.name': 'JACOBS & CO/CA'}, {'mgr.name': 'VAN STRUM & TOWNE INC.'}, {'mgr.name': 'RBF Capital, LLC'}, {'mgr.name': 'ALGERT GLOBAL LLC'}, {'mgr.name': 'WETHERBY ASSET MANAGEMENT INC'}, {'mgr.name': 'Avalon Global Asset Management LLC'}, {'mgr.name': 'Pacific Heights Asset Management LLC'}, {'mgr.name': 'Violich Capital Management, Inc.'}][0m

[1m> Finished chain.[0m


{'query': 'What are the top investment firms are in San Francisco',
 'result': 'The top investment firms in San Francisco include OSBORNE PARTNERS CAPITAL MANAGEMENT, LLC, OSTERWEIS CAPITAL MANAGEMENT INC, JACOBS & CO/CA, VAN STRUM & TOWNE INC., RBF Capital, LLC, ALGERT GLOBAL LLC, WETHERBY ASSET MANAGEMENT INC, Avalon Global Asset Management LLC, Pacific Heights Asset Management LLC, and Violich Capital Management, Inc.'}

In [17]:
tools = [company_tool, cypher_tool]


In [18]:
from langchain.agents import initialize_agent, AgentType

agent_chain = initialize_agent(tools, llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION)



In [None]:
agent_chain.invoke("What is 2 +2 ?")

In [19]:
agent_chain.invoke("What companies do you know about?")

{'input': 'What companies do you know about?',
 'output': 'I have information about the following companies:\n\n1. **NIKE INC**: A leading sportswear and athletic footwear company. Major investors include Bank Julius Baer & Co. Ltd, VANGUARD GROUP INC, BlackRock Inc., and STATE STREET CORP. Key challenges include competition and potential supply chain disruptions.\n\n2. **NEWS CORP NEW**: A global diversified media and information services company. Major investors include PRICE T ROWE ASSOCIATES INC, VANGUARD GROUP INC, and Independent Franchise Partners LLP. The company focuses on creating and distributing authoritative content across various media platforms.'}

In [20]:
agent_chain.invoke("Where are most of the investment firms?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
RETURN mgrAddress.city, COUNT(mgr) AS firmCount
ORDER BY firmCount DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'mgrAddress.city': 'New York', 'firmCount': 224}][0m

[1m> Finished chain.[0m


{'input': 'Where are most of the investment firms?',
 'output': 'Most investment firms are located in New York, with a total of 224 firms based there.'}

In [21]:
agent_chain.invoke("Where are the top investment firms in New York?")

{'input': 'Where are the top investment firms in New York?',
 'output': 'I could not find specific information regarding the top investment firms in New York. However, I can suggest some well-known investment firms that are generally recognized in the industry, such as Goldman Sachs, JPMorgan Chase, Morgan Stanley, BlackRock, and Citadel. If you need detailed information about any specific firm, please let me know.'}