# trying different methods of using LLM with Embeddings

In [16]:
import os

from dotenv import load_dotenv, find_dotenv
asdf = load_dotenv(find_dotenv()) # read local .env file

In [17]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown


In [18]:
file = 'drivers.csv'
loader = CSVLoader(file_path=file,encoding='utf-8')

In [19]:
from langchain.indexes import VectorstoreIndexCreator
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [20]:
queries = ["Tell me the youngest driver.","Tell me the drivers that are German", "Tell me the oldest driver.", "which driver uses the number 14?"]
responses = list(map(lambda query:index.query(query),queries))
[display(Markdown(r)) for r in responses]

 The youngest driver is Nelson Piquet Jr. with a date of birth of 25/07/1985.

 Nick Heidfeld, Adrian Sutil, Timo Glock, and Nico Rosberg.

 Fernando Alonso, born on 29/07/1981.

 Fernando Alonso.

[None, None, None, None]

In [21]:
docs = loader.load()

In [22]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [23]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [24]:
for query in queries:
    print(query)
    print(db.similarity_search(query))
    print()


Tell me the youngest driver.
[Document(page_content='driverId: 12\nname: piquet_jr\nnumber: \ncode: PIQ\nforename: Nelson\nsurname: Piquet Jr.\ndate of birth: 25/07/1985\nnationality: Brazilian\nurl: http://en.wikipedia.org/wiki/Nelson_Piquet,_Jr.', metadata={'source': 'drivers.csv', 'row': 3}), Document(page_content='driverId: 5\nname: kovalainen\nnumber: \ncode: KOV\nforename: Heikki\nsurname: Kovalainen\ndate of birth: 19/10/1981\nnationality: Finnish\nurl: http://en.wikipedia.org/wiki/Heikki_Kovalainen', metadata={'source': 'drivers.csv', 'row': 5}), Document(page_content='driverId: 8\nname: raikkonen\nnumber: 7\ncode: RAI\nforename: Kimi\nsurname: R�_ikk̦nen\ndate of birth: 17/10/1979\nnationality: Finnish\nurl: http://en.wikipedia.org/wiki/Kimi_R%C3%A4ikk%C3%B6nen', metadata={'source': 'drivers.csv', 'row': 8}), Document(page_content='driverId: 7\nname: bourdais\nnumber: \ncode: BOU\nforename: S̩bastien\nsurname: Bourdais\ndate of birth: 28/02/1979\nnationality: French\nurl: http

In [25]:
retriever = db.as_retriever()

In [26]:
llm = ChatOpenAI(temperature = 0.0)

In [27]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])

for query in queries:
    print(query)
    response = llm.call_as_llm(f"{qdocs} Question: {query}") 
    display(Markdown(response))


Tell me the youngest driver.


The youngest driver is Lewis Hamilton with a date of birth of 07/01/1985.

Tell me the drivers that are German


The drivers that are German are:

- Nick Heidfeld
- Nico Rosberg
- Adrian Sutil
- Timo Glock

Tell me the oldest driver.


The oldest driver is David Coulthard, born on March 27, 1971.

which driver uses the number 14?


Fernando Alonso uses the number 14.

In [28]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

for query in queries:
    print(query)
    response = qa_stuff.run(query)
    display(Markdown(response))


Tell me the youngest driver.


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


I'm sorry, but I don't have enough information to answer your question. The context provided only includes the birth dates of the drivers, but not their ages.

Tell me the drivers that are German


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


The drivers that are German are Heidfeld, Sutil, Glock, and Rosberg.

Tell me the oldest driver.


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


I'm sorry, but I don't have enough information to answer that question. The context provided only includes the birth dates of the drivers, but not their ages.

which driver uses the number 14?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


The driver who uses the number 14 is Alonso.

In [29]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

In [30]:
for query in queries:
    print(query)
    response = index.query(query, llm=llm)
    print(response)



Tell me the youngest driver.
I'm sorry, but I don't have enough information to answer your question. The context provided only includes the birth dates of the drivers, but not their ages.
Tell me the drivers that are German
The drivers that are German are: Nick Heidfeld, Adrian Sutil, Timo Glock, and Nico Rosberg.
Tell me the oldest driver.
I'm sorry, but I don't have enough information to answer that question. The context provided only includes the birth dates of the drivers, but not their ages.
which driver uses the number 14?
The driver who uses the number 14 is Fernando Alonso.
