# trying different methods of using LLM with Embeddings

In [37]:
queries = ["Tell me the name of every driver who is German","how many german drivers are?",  "which driver uses the number 14?", "which driver has the oldest birthdate?"]

In [38]:
import os

from dotenv import load_dotenv, find_dotenv
asdf = load_dotenv(find_dotenv()) # read local .env file

In [39]:
from langchain.document_loaders import CSVLoader
from langchain.callbacks import get_openai_callback
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.vectorstores import Chroma


In [40]:
files = ['drivers.csv','drivers_full.csv']

for file in files:
    print("=====================================")
    print(file)
    print("=====================================")
    with get_openai_callback() as cb:

        loader = CSVLoader(file_path=file,encoding='utf-8')
        docs = loader.load()

        from langchain.embeddings.openai import OpenAIEmbeddings

        embeddings = OpenAIEmbeddings()

        # create the vectorestore to use as the index
        db = Chroma.from_documents(docs, embeddings)
        # expose this index in a retriever interface
        retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":1000, "score_threshold":"0.2"})

        for query in queries:
            qa_stuff = RetrievalQA.from_chain_type(
                llm=OpenAI(temperature=0,batch_size=20), 
                chain_type="map_reduce", 
                retriever=retriever,
                verbose=True
            )

            print(query)
            result = qa_stuff.run(query)

            print(result)
            
        print(cb)




drivers.csv


Number of requested results 1000 is greater than number of elements in index 18, updating n_results = 18


Tell me the name of every driver who is German


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
 Nick Heidfeld, Nico Rosberg, Timo Glock, and Adrian Sutil.
how many german drivers are?


[1m> Entering new RetrievalQA chain...[0m


Number of requested results 1000 is greater than number of elements in index 18, updating n_results = 18



[1m> Finished chain.[0m
 4 German drivers.
which driver uses the number 14?


[1m> Entering new RetrievalQA chain...[0m


Number of requested results 1000 is greater than number of elements in index 18, updating n_results = 18



[1m> Finished chain.[0m
 The driver using the number 14 is David Coulthard.
which driver has the oldest birthdate?


[1m> Entering new RetrievalQA chain...[0m


Number of requested results 1000 is greater than number of elements in index 18, updating n_results = 18



[1m> Finished chain.[0m
 The driver with the oldest birthdate is 27/03/1971.
Tokens Used: 13866
	Prompt Tokens: 13339
	Completion Tokens: 527
Successful Requests: 8
Total Cost (USD): $0.27732
drivers_full.csv


Number of requested results 1000 is greater than number of elements in index 857, updating n_results = 857


Tell me the name of every driver who is German


[1m> Entering new RetrievalQA chain...[0m


Number of requested results 1000 is greater than number of elements in index 857, updating n_results = 857



[1m> Finished chain.[0m
 Kurt Kuhnke, Hans Herrmann, Hans Heyer, Joachim Winkelhock, Hans Klenk, Volker Weidler, Manfred Winkelhock, Pascal Wehrlein, Ralf Schumacher, Bernd Schneider, Heinz-Harald Frentzen, Michael Schumacher, Ernst Klodwig, Kurt Ahrens, Michael Bartels, Theo Helfrich, Gerhard Mitter, Rolf Stommelen, Markus Winkelhock, Paul Pietsch, Wolfgang Seidel, Rudolf Krause, Jochen Mass, Toni Ulmen, Wolfgang von Trips, Josef Peters, Hubert Hahne, Karl Kling, Hermann Lang, Helmut Niedermayr, Hans-Joachim Stuck, Günther Seiffert, Nick Heidfeld, Fritz Riess, Günther Bechem, Kurt Adolff, Mick Schumacher, Sebastian Vettel, Theo Fitzau, Erwin Bauer, Siegfried Stohr, Willi Heeks, Adolf Brudes, Christian Danner, Hans von Stuck, Edgar Barth, Brausch Niemann, Nico Hülkenberg, Nico Ros
how many german drivers are?


[1m> Entering new RetrievalQA chain...[0m


Number of requested results 1000 is greater than number of elements in index 857, updating n_results = 857



[1m> Finished chain.[0m
 I don't know.
which driver uses the number 14?


[1m> Entering new RetrievalQA chain...[0m


Number of requested results 1000 is greater than number of elements in index 857, updating n_results = 857



[1m> Finished chain.[0m
 No driver uses the number 14.
which driver has the oldest birthdate?


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
 The driver with the oldest birthdate is Frank Armi, born on 1918-10-12.
Tokens Used: 405259
	Prompt Tokens: 385674
	Completion Tokens: 19585
Successful Requests: 25
Total Cost (USD): $8.105180000000002
