# trying different methods of using LLM with Embeddings

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
asdf = load_dotenv(find_dotenv()) # read local .env file

In [2]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown


In [3]:
file = 'drivers.csv'
loader = CSVLoader(file_path=file,encoding='utf-8')

In [4]:
from langchain.indexes import VectorstoreIndexCreator
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [5]:
queries = ["Tell me the youngest driver.","Tell me every driver that is German","how many german drivers are?", "Tell me the oldest driver.", "which driver uses the number 14?", "which driver has the oldest birthdate?"]
responses = list(map(lambda query:index.query(query),queries))
[display(Markdown(r)) for r in responses]

 The youngest driver is Nelson Piquet Jr. with a date of birth of 25/07/1985.

 Nick Heidfeld, Timo Glock, Adrian Sutil, and Nico Rosberg are all German drivers.

 Four.

 The oldest driver is Takuma Sato with a date of birth of 28/01/1977.

 Fernando Alonso.

 Fernando Alonso, with a birthdate of 29/07/1981.

[None, None, None, None, None, None]

In [6]:
docs = loader.load()

In [7]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [8]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [9]:
for query in queries:
    print(query)
    print(db.similarity_search(query))
    print()


Tell me the youngest driver.
[Document(page_content='driverId: 12\nname: piquet_jr\nnumber: \ncode: PIQ\nforename: Nelson\nsurname: Piquet Jr.\ndate of birth: 25/07/1985\nnationality: Brazilian', metadata={'source': 'drivers.csv', 'row': 3}), Document(page_content='driverId: 5\nname: kovalainen\nnumber: \ncode: KOV\nforename: Heikki\nsurname: Kovalainen\ndate of birth: 19/10/1981\nnationality: Finnish', metadata={'source': 'drivers.csv', 'row': 5}), Document(page_content='driverId: 8\nname: raikkonen\nnumber: 7\ncode: RAI\nforename: Kimi\nsurname: R�_ikk̦nen\ndate of birth: 17/10/1979\nnationality: Finnish', metadata={'source': 'drivers.csv', 'row': 8}), Document(page_content='driverId: 3\nname: rosberg\nnumber: 6\ncode: ROS\nforename: Nico\nsurname: Rosberg\ndate of birth: 27/06/1985\nnationality: German', metadata={'source': 'drivers.csv', 'row': 2})]

Tell me every driver that is German
[Document(page_content='driverId: 2\nname: heidfeld\nnumber: \ncode: HEI\nforename: Nick\nsurname

In [10]:
retriever = db.as_retriever()

In [11]:
llm = ChatOpenAI(temperature = 0.0)

In [12]:
if file == 'drivers.csv':
    qdocs = "".join([docs[i].page_content for i in range(len(docs))])

    for query in queries:
        print(query)
        response = llm.call_as_llm(f"{qdocs} Question: {query}") 
        display(Markdown(response))


Tell me the youngest driver.


The youngest driver is Nico Rosberg with a date of birth of 27/06/1985.

Tell me every driver that is German


heidfeld, rosberg, nakajima, glock, sutil

how many german drivers are?


There are three German drivers.

Tell me the oldest driver.


The oldest driver is David Coulthard with a date of birth of 27/03/1971.

which driver uses the number 14?


Fernando Alonso uses the number 14.

which driver has the oldest birthdate?


David Coulthard has the oldest birthdate, which is 27/03/1971.

In [13]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

for query in queries:
    print(query)
    response = qa_stuff.run(query)
    display(Markdown(response))


Tell me the youngest driver.


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


I'm sorry, but I don't have enough information to answer your question. The context only provides the birth dates of the drivers, but not their ages.

Tell me every driver that is German


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


The drivers that are German are:
- Nick Heidfeld
- Timo Glock
- Adrian Sutil
- Nico Rosberg

how many german drivers are?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


There are four German drivers listed in the given context.

Tell me the oldest driver.


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


I'm sorry, but I don't have enough information to answer that question. The context only provides the birth dates of the drivers, but not their ages.

which driver uses the number 14?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


The driver with the number 14 is Alonso.

which driver has the oldest birthdate?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


The driver with the oldest birthdate is Alonso, with a birthdate of 29/07/1981.

In [14]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="map_reduce", 
    retriever=retriever, 
    verbose=True
)

for query in queries:
    print(query)
    response = qa_stuff.run(query)
    display(Markdown(response))

Tell me the youngest driver.


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


Sorry, I cannot determine the youngest driver as the given portion of the document does not provide information about the age or birthdate of any driver.

Tell me every driver that is German


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


I'm sorry, but I cannot provide a complete answer as I only have access to the provided text which only mentions one driver that is German, Nico Rosberg.

how many german drivers are?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


Based on the given portions of the document, it is not possible to determine the total number of German drivers. Each portion only provides information about one German driver.

Tell me the oldest driver.


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


I'm sorry, I cannot answer that question as the age or birth year of the drivers is not provided in the given text.

which driver uses the number 14?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


There is no information provided in the given portion of the document about which driver uses the number 14.

which driver has the oldest birthdate?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


Sorry, the given portion of the document does not provide information about the birthdates of any drivers, so it cannot be determined which driver has the oldest birthdate.

In [15]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="refine", 
    retriever=retriever, 
    verbose=True
)

for query in queries:
    print(query)
    response = qa_stuff.run(query)
    display(Markdown(response))

Tell me the youngest driver.


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


Based on the new context, the youngest driver is Nico Rosberg with the driverId 3. He was born on June 27, 1985, and is a German national.

Tell me every driver that is German


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


The original answer does not need to be refined as it already includes the German driver Nico Rosberg.

how many german drivers are?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


Based on the additional context, we know that at least two drivers, Adrian Sutil and Nico Rosberg, are German. However, we still cannot determine the total number of German drivers in the context provided.

Tell me the oldest driver.


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


The original answer needs to be updated based on the new context. The oldest driver is not Takuma Sato, but rather Sebastien Bourdais, with a date of birth of 28/02/1979.

which driver uses the number 14?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


The context provided is not useful in refining the original answer as it pertains to a different driver with a different number. The original answer remains correct - the driver who uses the number 14 is Fernando Alonso.

which driver has the oldest birthdate?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


The new context is not useful in determining which driver has the oldest birthdate among the drivers mentioned. Therefore, the original answer of Sebastien Bourdais remains unchanged.

In [16]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type='map_rerank', 
    retriever=retriever, 
    verbose=True
)

for query in queries:
    print(query)
    response = qa_stuff.run(query)
    display(Markdown(response))

Tell me the youngest driver.


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


The context does not provide enough information to determine the youngest driver.

Tell me every driver that is German


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


Adrian Sutil is a German driver.

how many german drivers are?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


It is unclear how many German drivers there are based on this context. The information only provides details about one German driver named Adrian Sutil. 

Tell me the oldest driver.


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


Fernando Alonso, born on 29/07/1981

which driver uses the number 14?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


Alonso

which driver has the oldest birthdate?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


Fernando Alonso

In [17]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

In [18]:
for query in queries:
    print(query)
    response = index.query(query, llm=llm)
    print(response)

Tell me the youngest driver.
I'm sorry, but I don't have enough information to answer your question. The context only provides the birth dates of the drivers, but not their ages.
Tell me every driver that is German
The drivers that are German are:
- Nick Heidfeld
- Timo Glock
- Adrian Sutil
- Nico Rosberg
how many german drivers are?
There are four German drivers listed in the given context.
Tell me the oldest driver.
I'm sorry, but I don't have enough information to answer that question. The context only provides the birth dates of the drivers, but not their ages.
which driver uses the number 14?
The driver with the number 14 is Alonso.
which driver has the oldest birthdate?
The driver with the oldest birthdate is Alonso, with a birthdate of 29/07/1981.
