# trying different methods of using LLM with Embeddings

In [16]:
queries = ["Tell me the youngest driver.","Tell me every driver who is German","how many german drivers are?", "Tell me the oldest driver.", "which driver uses the number 14?", "which driver has the oldest birthdate?"]

In [17]:
import os

from dotenv import load_dotenv, find_dotenv
asdf = load_dotenv(find_dotenv()) # read local .env file

In [18]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown


In [19]:
file = 'drivers_full.csv'
loader = CSVLoader(file_path=file,encoding='utf-8')
docs = loader.load()

from langchain.indexes import VectorstoreIndexCreator
index_creator = VectorstoreIndexCreator()
docsearch = index_creator.from_loaders([loader])

docs[0]

Document(page_content='driverId: 1\nname: hamilton\nnumber: 44\ncode: HAM\nforename: Lewis\nsurname: Hamilton\ndate of birth: 1985-01-07\nnationality: British', metadata={'source': 'drivers_full.csv', 'row': 0})

In [23]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.indexes.vectorstore import VectorstoreIndexCreator

for query in queries:
    chain = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.vectorstore.as_retriever(), input_key="question")
    response = chain({"question": query})
    print(response)

{'question': 'Tell me the youngest driver.', 'result': ' The youngest driver is Pierre Gasly, with a date of birth of 1996-02-07.'}
{'question': 'Tell me every driver that is German', 'result': ' Gerhard Mitter (driverId: 423), Hans Herrmann (driverId: 478), Kurt Kuhnke (driverId: 442), and Hans Klenk (driverId: 753) are all German.'}
{'question': 'how many german drivers are?', 'result': ' Four.'}
{'question': 'Tell me the oldest driver.', 'result': ' The oldest driver is Danny Kladis with a date of birth of 1917-02-10.'}
{'question': 'which driver uses the number 14?', 'result': ' Fernando Alonso'}
{'question': 'which driver has the oldest birthdate?', 'result': ' Bruce McLaren has the oldest birthdate, 1937-08-30.'}


In [None]:
from langchain.indexes import VectorstoreIndexCreator
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [None]:

responses = list(map(lambda query:index.query(query),queries))
[display(Markdown(r)) for r in responses]

In [None]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [None]:
for query in queries:
    print(query)
    print(db.similarity_search(query))
    print()


In [None]:
retriever = db.as_retriever()

In [None]:
llm = ChatOpenAI(temperature = 0.0)

In [None]:
if file == 'drivers.csv':
    qdocs = "".join([docs[i].page_content for i in range(len(docs))])

    for query in queries:
        print(query)
        response = llm.call_as_llm(f"{qdocs} Question: {query}") 
        display(Markdown(response))


In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

for query in queries:
    print(query)
    response = qa_stuff.run(query)
    display(Markdown(response))


In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="map_reduce", 
    retriever=retriever, 
    verbose=True
)

for query in queries:
    print(query)
    response = qa_stuff.run(query)
    display(Markdown(response))

In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="refine", 
    retriever=retriever, 
    verbose=True
)

for query in queries:
    print(query)
    response = qa_stuff.run(query)
    display(Markdown(response))

In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type='map_rerank', 
    retriever=retriever, 
    verbose=True
)

for query in queries:
    print(query)
    response = qa_stuff.run(query)
    display(Markdown(response))

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

In [None]:
for query in queries:
    print(query)
    response = index.query(query, llm=llm)
    print(response)