In [None]:
import os
from dotenv import load_dotenv

import textwrap

from langchain_community.graphs import Neo4jGraph
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import GraphCypherQAChain

load_dotenv()

NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE')

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Global constants
VECTOR_INDEX_NAME = 'form_10k_chunks'
VECTOR_NODE_LABEL = 'Chunk'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'

kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)


In [None]:
# Create a langchain vector store from the existing Neo4j knowledge graph.
neo4j_vector_store = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(),
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name=VECTOR_INDEX_NAME,
    node_label=VECTOR_NODE_LABEL,
    text_node_properties=[VECTOR_SOURCE_PROPERTY],
    embedding_node_property=VECTOR_EMBEDDING_PROPERTY,
)

# Create a retriever from the vector store
retriever = neo4j_vector_store.as_retriever()

# Create a chatbot Question & Answer chain from the retriever
chain = RetrievalQAWithSourcesChain.from_chain_type(
    ChatOpenAI(temperature=0), chain_type="stuff", retriever=retriever
)

def prettychain(question: str) -> str:
    """Pretty print the chain's response to a question"""
    response = chain({"question": question},
        return_only_outputs=True,)
    print(textwrap.fill(response['answer'], 80))

llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY)

def prettyllm(question: str) -> str:
    """Pretty print the chain's response to a question"""
    response = llm.invoke(question)
    print(textwrap.fill(response.content, 80))
    # print(response.content)


In [None]:
prettyllm("What is the address of Blackrock?")


In [None]:
prettychain("What companies do you know about?")

In [None]:
prettychain("What companies are in San Jose?")

In [None]:
prettychain("What is the address of Blackrock?")

In [None]:
prettychain("Where is Netapp headquartered?")

In [None]:
prettychain("What companies are headquartered in San Jose?")

In [None]:
prettychain("In a single sentence, what is Netapp's industry segment?")

In [None]:
company_rows = kg.query("""
  MATCH (com:Company)
  RETURN com { .cusip6, .companyName } as company
""")

companies = list(map(lambda row: row['company'], company_rows))

print(companies[0])

In [None]:
set_company_location_cypher = """
  MATCH (com:Company {cusip6: $cusip6})
  SET com.companyAddress = $companyAddress
  SET com.location = point({latitude: $latitude, longitude: $longitude})
  MERGE (addr:Address {city: $city, state: $state})
  MERGE (com)-[:LOCATED_AT]->(addr)
"""
for company in companies:
    result = chain(f"Where is {company['companyName']} headquartered?")
    address_statement = result['answer']
    address_geocodes = gmaps.geocode(address_statement)
    if len(address_geocodes) > 0:
        address_geocode = address_geocodes[0]

        print(f"{company['companyName']} is located at {address_geocode['formatted_address']}")

        cusip6 = company['cusip6']

        location = get_location(address_geocode)
        city = get_city(address_geocode)
        state = get_state(address_geocode)
        country = get_country(address_geocode)

        cityOrState = city if city else state
        stateOrCountry = state if state else country 

        if location and cityOrState and stateOrCountry:
            kg.query(set_company_location_cypher, params={
                "companyAddress": address_geocode['formatted_address'],
                "cusip6": company['cusip6'],
                "latitude": location['lat'],
                "longitude": location['lng'],
                "city": cityOrState['long_name'],
                "state": stateOrCountry['long_name']
            })
    else:
        print(f"no geocode found for {company['companyName']} at {address_statement}")

In [None]:
# Which state has the most public companies listed?
kg.query("""
  MATCH p=(:Company)-[:LOCATED_AT]->(address:Address)
  RETURN address.state as state, count(address.state) as numCompanies
    ORDER BY numCompanies DESC
""")

In [None]:
# Which city in California has the most companies listed?
kg.query("""
  MATCH p=(:Company)-[:LOCATED_AT]->(address:Address)
         WHERE address.state = 'California'
  RETURN address.city as city, count(address.city) as numCompanies
    ORDER BY numCompanies DESC
""")

In [None]:
kg.query("""
  CALL db.index.fulltext.queryNodes("fullTextCompanyNames", $companyName) 
    YIELD node, score
  WITH node as com
  RETURN com.location, com.companyAddress
""", params={"companyName": "Fedex"})

In [None]:
# Which investment firms are near Fedex?
kg.query("""
  CALL db.index.fulltext.queryNodes("fullTextCompanyNames", $companyName) 
         YIELD node, score
  WITH node as com
  MATCH (mgr:Manager)
    WHERE point.distance(mgr.location, com.location) < $withinMeters
  WITH mgr, 
    toInteger(point.distance(mgr.location, com.location) / 1000) as distanceKm
  RETURN mgr.managerName, mgr.managerAddress, 
        apoc.number.format(distanceKm) + "km" as distance

""", params={"companyName": "Fedex", "withinMeters": 100 * 1000})