# trying different methods of using LLM with Embeddings

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
asdf = load_dotenv(find_dotenv()) # read local .env file

In [2]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown


In [3]:
file = 'drivers.csv'
loader = CSVLoader(file_path=file,encoding='utf-8')

In [4]:
from langchain.indexes import VectorstoreIndexCreator
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [5]:
query ="Tell me the youngest driver."
response = index.query(query)

In [6]:
display(Markdown(response))

 The youngest driver is Nelson Piquet Jr. with a date of birth of 25/07/1985.

In [7]:
loader = CSVLoader(file_path=file)

In [8]:
docs = loader.load()

In [9]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [10]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [11]:
docs = db.similarity_search(query) 
len(docs)

4

In [12]:
print(docs)

[Document(page_content='driverId: 12\nname: piquet_jr\nnumber: \ncode: PIQ\nforename: Nelson\nsurname: Piquet Jr.\ndate of birth: 25/07/1985\nnationality: Brazilian\nurl: http://en.wikipedia.org/wiki/Nelson_Piquet,_Jr.', metadata={'source': 'drivers.csv', 'row': 3}), Document(page_content='driverId: 5\nname: kovalainen\nnumber: \ncode: KOV\nforename: Heikki\nsurname: Kovalainen\ndate of birth: 19/10/1981\nnationality: Finnish\nurl: http://en.wikipedia.org/wiki/Heikki_Kovalainen', metadata={'source': 'drivers.csv', 'row': 5}), Document(page_content='driverId: 8\nname: raikkonen\nnumber: 7\ncode: RAI\nforename: Kimi\nsurname: Rï¿½_ikkÌ¦nen\ndate of birth: 17/10/1979\nnationality: Finnish\nurl: http://en.wikipedia.org/wiki/Kimi_R%C3%A4ikk%C3%B6nen', metadata={'source': 'drivers.csv', 'row': 8}), Document(page_content='driverId: 7\nname: bourdais\nnumber: \ncode: BOU\nforename: SÌ©bastien\nsurname: Bourdais\ndate of birth: 28/02/1979\nnationality: French\nurl: http://en.wikipedia.org/wiki/

In [13]:
retriever = db.as_retriever()

In [14]:
llm = ChatOpenAI(temperature = 0.0)

In [15]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])

In [16]:
response = llm.call_as_llm(f"{qdocs} Question: {query}") 


In [17]:
display(Markdown(response))

I'm sorry, I cannot answer this question as the birth dates of all the drivers have not been provided.

In [18]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [19]:
response = qa_stuff.run(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [20]:
display(Markdown(response))

I'm sorry, but I don't have enough information to answer your question. The context provided only includes the birth dates of the drivers, but not their ages.

In [21]:
response = index.query(query, llm=llm)

In [22]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

In [23]:
response = index.query("tell me every driver that was born in 1985", llm=llm)
print(response)

There are two drivers that were born in 1985: Nico Rosberg and Lewis Hamilton.
