In [35]:
# !pip install spacy

In [42]:
# !pip uninstall torch torchvision torchaudio -y  



In [43]:
# !pip install torch torchvision torchaudio  

In [44]:
# !python -m spacy download en_core_web_sm

## Load Libraries and API Configurations

In [88]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Neo4jVector
from langchain.document_loaders import TextLoader
from langchain.docstore.document import Document
import pandas as pd
from langchain.graphs import Neo4jGraph
from langchain.vectorstores import Neo4jVector
from langchain.embeddings.openai import OpenAIEmbeddings
import os
import openai
from neo4j import GraphDatabase
from graphdatascience import GraphDataScience

url = "neo4j+s://483c47f7.databases.neo4j.io"
username = "neo4j"
password = ""

graph = Neo4jGraph(
    url=url,
    username=username,
    password=password
)

openai_api_key = ""
openai.api_key = openai_api_key


os.environ['OPENAI_API_KEY'] = openai_api_key

Failed to write data to connection IPv4Address(('483c47f7.databases.neo4j.io', 7687)) (IPv4Address(('34.69.128.95', 7687)))
Failed to write data to connection ResolvedIPv4Address(('34.69.128.95', 7687)) (IPv4Address(('34.69.128.95', 7687)))


## Load the Text

In [89]:
# Reference Link:  https://sec-api.io/resources/extract-textual-data-from-edgar-10-k-filings-using-python
# Load 1 section

SEC_API_KEY = ''
from sec_api import ExtractorApi

extractorApi = ExtractorApi(SEC_API_KEY)

# helper function to pretty print long, single-line text to multi-line text
def pprint(text, line_length=100):
  words = text.split(' ')
  lines = []
  current_line = ''
  for word in words:
    if len(current_line + ' ' + word) <= line_length:
      current_line += ' ' + word
    else:
      lines.append(current_line.strip())
      current_line = word
  if current_line:
    lines.append(current_line.strip())
  print('\n'.join(lines))

# URL of Tesla's 10-K filing
filing_10_k_url = 'https://www.sec.gov/Archives/edgar/data/1318605/000156459021004599/tsla-10k_20201231.htm'

# extract text section "Item 1 - Business" from 10-K
item_1_text = extractorApi.get_section(filing_10_k_url, '1', 'text')
t = pprint(item_1_text[0:1000])
t

ITEM 1. 

BUSINESS

##TABLE_END

Overview 

We design, develop, manufacture, sell and lease
high-performance fully electric vehicles and energy generation and storage systems, and offer
services related to our sustainable energy products. We generally sell our products directly to
customers, including through our website and retail locations. We also continue to grow our
customer-facing infrastructure through a global network of vehicle service centers, Mobile Service
technicians, body shops, Supercharger stations and Destination Chargers to accelerate the widespread
adoption of our products. We emphasize performance, attractive styling and the safety of our users
and workforce in the design and manufacture of our products and are continuing to develop full
self-driving technology for improved safety. We also strive to lower the cost of ownership for our
customers through continuous efforts to reduce manufacturing costs and by offering financial
services tailored to our products. Our


In [90]:
# Load several sections
item_1_text    = extractorApi.get_section(filing_10_k_url, '1', 'text')
item_2_text    = extractorApi.get_section(filing_10_k_url, '2', 'text')
item_3_text    = extractorApi.get_section(filing_10_k_url, '3', 'text')

In [91]:
class Document:
    def __init__(self, page_content, metadata=None):
        self.page_content = page_content
        self.metadata = metadata if metadata is not None else {}

# Now create Document instances and pass an empty dictionary for metadata
doc1 = Document(page_content=item_1_text, metadata={})
doc2 = Document(page_content=item_2_text, metadata={})
doc3 = Document(page_content=item_3_text, metadata={})

# Combine the documents into a list
combined_docs = [doc1, doc2, doc3]

# Use the text splitter
text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=20)
docs = text_splitter.split_documents(combined_docs)



## Create a Graph Index

In [31]:
graph.query("""
CALL db.index.vector.createNodeIndex(
    'TeslaEmbeddings', //index name
    'Chunk', //node label
    'tesla_embedding', //property name
    1536, //vector size
    'cosine' //similarity metric
)
""")

[]

## Use new index to store embeddings

In [29]:
help(Neo4jVector.from_documents)

Help on method from_documents in module langchain.vectorstores.neo4j_vector:

from_documents(documents: 'List[Document]', embedding: 'Embeddings', distance_strategy: 'DistanceStrategy' = <DistanceStrategy.COSINE: 'COSINE'>, ids: 'Optional[List[str]]' = None, **kwargs: 'Any') -> 'Neo4jVector' method of abc.ABCMeta instance
    Return Neo4jVector initialized from documents and embeddings.
    Neo4j credentials are required in the form of `url`, `username`,
    and `password` and optional `database` parameters.



In [32]:
hybrid_db = Neo4jVector.from_documents(
    docs, 
    OpenAIEmbeddings(), 
    url=url, 
    username=username, 
    password=password,
    index_name = "TeslaEmbeddings",
    search_type="hybrid"
)

## Use new index to store embeddings

In [47]:
index_name = "TeslaEmbeddings"  # default index name

store = Neo4jVector.from_existing_index(
    OpenAIEmbeddings(),
    url=url,
    username=username,
    password=password,
    index_name=index_name,
)

In [52]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain

chain = RetrievalQAWithSourcesChain.from_chain_type(
    ChatOpenAI(temperature=0),
    chain_type="stuff",
    retriever=store.as_retriever()
)

In [55]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
qa = ConversationalRetrievalChain.from_llm(
    ChatOpenAI(temperature=0), store.as_retriever(), memory=memory)

In [58]:
print(qa({"question": "What are the topics covered in these documents?"})["answer"])

The documents cover the following topics:

1. Legal proceedings and potential penalties related to environmental regulations.
2. Overview of Tesla's business, including the design, development, manufacturing, and sales of electric vehicles and energy generation and storage systems.
3. Segment information, including the automotive segment and the energy generation and storage segment.
4. The use, storage, and disposal of lithium-ion battery packs and ongoing regulatory changes.
5. Regulations applicable to solar and battery storage providers, including interconnection agreements with utilities.
6. Net metering and its availability to solar customers in most states in the U.S.
7. Competition in the automotive market.


In [61]:
legal = qa({"question": "What legal proceedings and potential penalties are discussed in these documents?"})["answer"]
print(legal)

The legal proceedings discussed in the document include:

1. Notices of violation issued by the Bay Area Air Quality Management District (BAAQMD) relating to air permitting and compliance for the Fremont Factory. Formal proceedings have not been initiated, and Tesla has disputed certain allegations. The potential penalties are not specified, but it is stated that any material proceeding is likely to have penalties exceeding $1 million.

2. The German Umweltbundesamt has issued a notice and fine of 12 million euros to Tesla's subsidiary in Germany for alleged non-compliance with market participation notifications and take-back obligations for end-of-life battery products. The outcome and final amount of penalties are uncertain, but Tesla has filed an objection, and it is not expected to have a material adverse impact on the business.

3. Challenges from automobile dealer trade associations regarding the legality of Tesla's operations and attempts to limit or prohibit the company's abili

In [62]:
business_overview = qa({"question": "Provide a business overview."})["answer"]
print(business_overview)

Tesla is a company that designs, develops, manufactures, sells, and leases high-performance fully electric vehicles and energy generation and storage systems. They offer a range of products and services related to sustainable energy. Tesla sells its products directly to customers through its website and retail locations. They also have a global network of vehicle service centers, Mobile Service technicians, body shops, Supercharger stations, and Destination Chargers to support their customers. 

In terms of their products, Tesla's automotive segment includes the design, development, manufacturing, sales, and leasing of electric vehicles. They offer models such as the Model 3, Model Y, Model S, and Model X. They also provide services like non-warranty after-sales vehicle services, sales of used vehicles, retail merchandise, and vehicle insurance revenue.

The energy generation and storage segment of Tesla's business involves the design, manufacture, installation, sales, and leasing of s

In [63]:
regulatory = qa({"question": "Which regulatory impacts and changes are discussed?"})["answer"]
print(regulatory)

The documents discuss several regulatory impacts and changes, including:


2. Disclosure requirements: The U.S. Automobile Information and Disclosure Act requires manufacturers to disclose certain information regarding the manufacturer's suggested retail price, optional equipment and pricing. Fuel economy ratings and safety ratings are also required to be included.

3. Foreign regulations: Vehicles sold outside of the U.S. are subject to foreign safety, environmental, and other regulations. These regulations may differ from those in the U.S. and may require redesign and retesting of vehicles.

4. European Union regulations: The European Union has established new rules regarding additional compliance oversight, which commenced in 2020. There is also regulatory uncertainty related to the United Kingdom's withdrawal from the European Union.

5. Self-driving vehicle regulations: Laws pertaining to self-driving vehicles are evolving globally. While there are currently no federal U.S. regula

In [64]:
competition = qa({"question": "What is discussed regarding competition?  Who are the competitors?"})["answer"]
print(competition)

Tesla's competitors in the automotive market include established automobile manufacturers producing internal combustion vehicles, as well as new and established manufacturers entering the market for electric and alternative fuel vehicles. Many major automobile manufacturers have electric vehicles available today, and others are developing electric vehicles. In addition, several manufacturers offer hybrid vehicles.

In terms of energy storage systems, Tesla competes with both established and emerging companies that offer similar products or alternatives to its systems. Competition is based on factors such as price, energy density, and efficiency. Tesla believes its strong brand, product specifications, and modular, scalable nature of its energy storage products give it a competitive advantage.

In the solar energy business, Tesla competes with traditional utility companies that supply energy to potential customers. Competition is primarily based on price and the ease of switching to ele

## Update Neo4j

In [78]:
q = """
MERGE (t:Company {company_name: 'Tesla'})
WITH t

MATCH (c:Chunk)
WITH c,t

MERGE (c)<-[l:EMBEDDING]-(t)
RETURN count(l)

"""
graph.query(q)

[{'count(l)': 59}]

In [81]:
q = f"""
MATCH (t:Company)
WHERE t.company_name = 'Tesla'
WITH t

MERGE (legal:Legal)
WITH legal, t

MERGE (legal)<-[l:LEGAL_DESCRIPTION {{description: "{legal}"}}]-(t)
RETURN count(l)

"""
graph.query(q)

[{'count(l)': 1}]

In [83]:
q = f"""
MATCH (t:Company)
WHERE t.company_name = 'Tesla'
WITH t

MERGE (BusinessOverview:BusinessOverview)
WITH BusinessOverview, t

MERGE (BusinessOverview)<-[l:BUSIENSS_OVERVIEW {{description: "{business_overview}"}}]-(t)
RETURN count(l)

"""
graph.query(q)

[{'count(l)': 1}]

In [84]:
q = f"""
MATCH (t:Company)
WHERE t.company_name = 'Tesla'
WITH t

MERGE (RegulatoryImpacts:RegulatoryImpacts)
WITH RegulatoryImpacts, t

MERGE (RegulatoryImpacts)<-[l:REGULATORY_IMPACTS {{description: "{regulatory}"}}]-(t)
RETURN count(l)

"""
graph.query(q)

[{'count(l)': 1}]

In [86]:
q = f"""
MATCH (t:Company)
WHERE t.company_name = 'Tesla'
WITH t

MERGE (Competition:Competition)
WITH Competition, t

MERGE (Competition)<-[l:COMPETITION_INFORMATION {{description: "{competition}"}}]-(t)
RETURN count(l)

"""
graph.query(q)

[{'count(l)': 1}]