In [1]:
# to reference envinronment variables
from dotenv import load_dotenv
# to split input data source into chuncks
from langchain.text_splitter import CharacterTextSplitter
# to load pdf
from PyPDF2 import PdfReader
# to convert text chunks into embeddings
from langchain.embeddings.openai import OpenAIEmbeddings
# for vector store of embeddings and chunks
from langchain.vectorstores import FAISS


In [3]:
load_dotenv()

# create corpus from input file
inputfilepath = '.\SPGlobal_Tesla,Inc._EventDetail_17-05-2023.pdf'
pdfObj = PdfReader(inputfilepath)

corpus = ''
   
for page in pdfObj.pages:
    corpus += page.extract_text()

# print(f'Corpus character length: {len(corpus)}') # 82748

# splitting corpus into chunks
text_splitter = CharacterTextSplitter(separator='\n\n'
                                      , chunk_size = 1000
                                      , chunk_overlap = 200
                                      , length_function = len
                                      )
 
chunks = text_splitter.split_text(corpus)
print(f'Data type of chunks: {type(chunks)}') # list
print(f'Count of chunks: {len(chunks)}') # list
   
embeddings = OpenAIEmbeddings()

# save just the embedding into a text file
saveEmbeddings = embeddings.embed_documents(chunks)
print(f'Length of saveEmbeddings: {len(saveEmbeddings)}')

with open('embeddings.txt', 'w') as file:
    for embedding in saveEmbeddings:
        file.write(' '.join(str(value) for value in embedding))
        file.write('\n')


        
# create the vector db of embeddings and corresponding chunks
knowledge_base = FAISS.from_texts(chunks, embeddings)

# make local copy of the knowledgeBase
knowledge_base.save_local('localFAISSkb', index_name='Tesla')
    

Data type of chunks: <class 'list'>
Count of chunks: 1
Length of saveEmbeddings: 1


In [4]:
saveEmbeddings, type(saveEmbeddings)

([[-3.3965230001104965e-05,
   -0.02692008181589453,
   0.002306177565936635,
   -0.007776909340004127,
   -0.002488176691202927,
   -0.001681758511885903,
   -0.02167714512762253,
   -0.012996476008678206,
   -0.025204772248920152,
   -0.019682322469736172,
   0.02043053816463264,
   0.032640207979942576,
   -0.01245324877340884,
   -0.006704735745816957,
   0.01974226140460205,
   0.009389173927615726,
   0.029144260845868453,
   -0.010927716786268441,
   0.03138225352574291,
   -0.007681104449322535,
   -0.0018645620869742678,
   0.0034986153527591653,
   -0.016229420497463114,
   0.0011731606118914064,
   -0.015150500540318176,
   -0.003503551597706615,
   0.0209843015677225,
   -0.004453885066509787,
   -0.0030245781047808207,
   0.013847303720860835,
   0.013687933780173406,
   0.0012517554920187276,
   -0.011482107041026177,
   0.01109927198936648,
   -0.025076957826262295,
   -0.032144289391462764,
   0.011457105430218585,
   -0.007962129049385232,
   0.03747826568667961,
   -0

In [5]:
knowledge_base.as_retriever()
# VectorStoreRetriever(vectorstore=<langchain.vectorstores.faiss.FAISS object 
# at 0x00000208CECDCD30>, search_type='similarity', search_kwargs={})
doc = knowledge_base.as_retriever()



In [6]:
# load knowledge base from local
current_kb = FAISS.load_local('localFAISSkb', embeddings, index_name='Tesla')

In [7]:
# create prompt template
from langchain import PromptTemplate


questionAreas = ['Financial results', 'Partnerships', 'Market conditions', 'Product announcement']

template = ''' For this Tesla transcript, what are the most interesting/important questions asked by analysts 
on following {topics}. Asssume unknown attendees also as analysts. 
If there are no questions for a topic, respond with 'None' for the corresponding topic list item.
'''

prompt = PromptTemplate(
    input_variables=["topics"],
    template=template
)


In [9]:
# docs = current_kb.similarity_search(prompt) fails
docs = current_kb.similarity_search (f'For this Tesla transcript, what are the most interesting/important questions asked by analysts \
on following {questionAreas}. Asssume unknown attendees also as analysts.')
docs


[Document(page_content='NASDAQGS:TSLA (MI KEY: 4574287; SPCIQ KEY: 27444752)\nDetail\nTesla, Inc. - Shareholder/Analyst Call\nAnnounced Date 06/04/2023 21:36:00\nPeriod Ended NA\nCompany Name Tesla Inc. NASDAQGS:TSLA\nSource SEC Form DEF 14A\nEvent Shareholder or Analyst Call\nAdvisors NA\nSituation 2023 Annual Meeting of StockholdersEvent Details\nLive Phone Number NA\nLive Passcode NA\nLive Other Phone Number NA\nLive Other Passcode NA\nLive Audio Details & Webcast URL http://www.meetnow.global/TESLA2023\nReplay Phone Number NA\nReplay Passcode NA\nReplay Begins NA\nReplay Ends NA\nReplay Audio Details & Webcast NA\nCall Description NA\nHost 1 NA\nHost 2 NA\nHost 3 NACall Details\n\xa0\nNo company details exists for the transcript.\n\xa0Company Details\n\xa0\nTesla, Inc. | Event Detail\nLicensed to martin.thomasmathews@ihsmarkit.com Powered by S&P Global | Page 1 of 20Transcript\nTesla, Inc. (NasdaqGS:TSLA)\nTuesday, May 16, 2023 9:00 PM\nExecutives\nElon R. Musk - Technoking of Tesl

In [11]:
# get response from OpenAI
from langchain.chains import LLMChain
from langchain.llms import OpenAI

llm = OpenAI()
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain only specifying the input variable.
print(chain.run('questionAreas'))



1. Battery technology and charging infrastructure: 
- What investments are you making to improve battery technology and charging infrastructure? 
- What new charging technologies are you developing to increase charging speed and convenience? 
- What is your strategy for building out the charging infrastructure to support the increasing demand for electric vehicles? 

2. Autonomous driving capabilities: 
- How close are you to having full autonomy for your vehicles? 
- What challenges have you faced in developing autonomous driving capabilities? 
- What are the safety considerations for autonomous driving? 

3. Production and delivery of vehicles: 
- What strategies are you using to increase production and delivery of vehicles? 
- What are the biggest challenges you are facing in ramping up production and delivery? 
- What systems and processes have you implemented to improve vehicle delivery times?


# Dont know yet
1. How to see a document in vectorstore
1. How to get count of all documents in vectorstore
1. How to get all the documents in a vectorstore
1. How to see a document and its embedding in a vectorstore
1. Can you put a prompt in faiss similarity search?

In [None]:
#############ROUGH#######################
# Initialize FAISS vector store and OpenAI embeddings
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(chunks, embeddings)

# Get document at index 0 and its embedding
doc = vector_store.get_document_by_index(0)
embedding = embeddings.embed(doc.page_content)


# Initialize FAISS vector store
vector_store = FAISS.from_documents(chunks)

# Get all documents from vector store
docs = vector_store.get_all_documents()

# Create dictionary from document objects
doc_dict = {doc.id: doc.page_content for doc in docs}

# Initialize FAISS vector store
vector_store = FAISS.from_documents(chunks)

# Get total number of documents in vector store
doc_count = vector_store.get_document_count()