In [3]:
from nltk.tokenize import sent_tokenize

from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import Qdrant
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader

In [13]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)

In [14]:
embed = embeddings.embed_query("Hi my name is Yash")

In [15]:
print(len(embed))

768


In [16]:
print(embed[:5])

[0.05506213381886482, 0.005890875123441219, 0.009370914660394192, -0.01350666768848896, 0.05912426486611366]


In [17]:
sentences = sent_tokenize(open('./cricket_wiki.txt').read())
print(len(sentences))

288


In [19]:
doc_store = Qdrant.from_texts(
    sentences, embeddings, location=":memory:", collection_name="texts"
)

In [20]:
query ="when did cricket became an international sport?"

In [21]:
response = doc_store.similarity_search(query)

In [22]:
response[0]

Document(page_content='[45]\n\nCricket becomes an international sport\n\nThe first English team to tour overseas, on board ship to North America, 1859\nIn 1844, the first-ever international match took place between what were essentially club teams, from the United States and Canada, in Toronto; Canada won.', metadata={})

In [None]:
llm = ChatOpenAI(temperature = 0.0)

In [63]:
doc_subset = "".join([response[i].page_content for i in range(len(response[:3]))])

In [None]:
response = llm.call_as_llm(f"{doc_subset} Question: {query}") 

In [None]:
print(response)

In [None]:
retriever = doc_store.as_retriever()

In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [None]:
response = qa_stuff.run(query)

In [None]:
print(response)

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=Qdrant,
    embedding=embeddings,
).from_loaders([loader])