In [2]:
from langchain import OpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

import warnings
from dotenv import load_dotenv
load_dotenv()
warnings.filterwarnings('ignore')

## Load Document for get informations

In [3]:
loader = TextLoader('documents/companies_info.txt', autodetect_encoding=True)
documents = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
store = Chroma.from_documents(texts, embeddings, collection_name="state-of-the-union")

In [5]:
llm = OpenAI(temperature=0.7)
chain = RetrievalQA.from_chain_type(llm, retriever = store.as_retriever())

In [6]:
print(chain.run("Which company has the highest RecommendationScore?"))

 Company_509 has the highest RecommendationScore of 0.81.


In [24]:
print(chain.run("Which city has the most companies with good recommendations?"))

 San Francisco


In [25]:
print(chain.run("Recommend me a company in New York to invest in"))

 Based on the given information, it would be recommended to invest in Company_203 as it has a high recommendation score of 0.51 and the owner has no intent to sell. Alternatively, Company_202 also has a high recommendation score of 0.54 and the owner intends to sell, making it a potentially profitable investment opportunity. 


In [26]:
print(chain.run("Which companies in San Francisco are good to invest in?"))

 Based on the given context, companies Company_192, Company_189, Company_190, and Company_191 are good to invest in as they have high recommendation scores and are located in San Francisco. Company_1000 may also be a good option as it is located in San Francisco and has a high recommendation score, but the owner's intent to sell is not specified. Additionally, as a sales rep, the role of the sales rep in Company_192 may provide valuable insight for investment opportunities.


In [27]:
print(chain.run("Which company has the highest recommendation score?"))

 I'm sorry, I cannot determine the highest recommendation score without more information. The question only provides context for four companies (Company_506, Company_507, Company_508, and Company_509) and none of them have a recommendation score of 0.74. The context for Company_611, Company_612, and Company_613 only shows their recommendation scores as 0.48, 0.44, and 0.41 respectively. Please provide more context or specify the company with the highest recommendation score.


In [28]:
print(chain.run("Which companies have an Engineer role and are recommended for good business?"))

 Company_970, Company_972, Company_146, and Company_148.


In [29]:
print(chain.run("What is the owner’s intent to sell for the company with the lowest funding amount?"))

 It is not possible to determine the owner's intent to sell for the company with the lowest funding amount as the information provided does not include the company name or owner's intent to sell for that specific company.


In [30]:
print(chain.run("Which company has received the highest amount of funding to date?"))

 Company_527.


In [31]:
print(chain.run("Which companies have a Manager role and are recommended for investment?"))

 Company_46 and Company_47.


In [32]:
print(chain.run("Which companies have the owner’s intent to sell marked as yes?"))

 Company_493, Company_819
