In [None]:
import faiss

In [None]:
from retriever import RetrievalModel

In [None]:
# sentence-transformer retrieval model, this is used to encode documents as vectors as well as queries
retriever = RetrievalModel()


In [None]:
# efficient vector search library
from faiss import IndexFlatL2


In [None]:
# initialize the faiss index with the dimension of the embeddings, e.g. 768, 1024, 2048
index = IndexFlatL2(1024)


In [None]:
#read the file as a single string
with open('freud.txt') as f:
    freud = f.read()    

In [None]:
# chunk the file into passages of length 2000 with overlaps of 200 characters
passages = []
for i in range(0, len(freud), 1800):
    passages.append({'passage_text': freud[i: i + 2000]})


In [None]:
# embed the documents as vectors
corpus = retriever.encode_corpus(passages)


In [None]:
# add the vector embeddings to the faiss index
index.add(corpus)


In [None]:
question = "Where was Ice Spice born?"


In [None]:
# encode the question as a vector
query = retriever.encode_queries([question])


In [None]:
# retrieve the 3 nearest neighbors of the query vector from the document vectors
# the distances are stored in D, the indices of the documents are stored in I
D, I = index.search(query, k=3)


In [None]:
# get the associated passage strings
relevant_documents = [passages[i] for i in I[0]]


In [None]:
openai_api_key="sike"

In [None]:
import openai


In [None]:
openai.api_key = openai_api_key


In [None]:
# chat completion takes as input a list of messages in the format below
messages = [{'role': 'system', 'content': f"""
                You are an intelligent conversational agent, who can chat with the user in a friendly manner, and also answer questions using the provided context.
                If you answer a question use in-line citations (e.g. [1]).
                """}]


def ask(question):
    messages.append({'role': 'user', 'content': question})
    query = retriever.encode_queries([question])
    D, I = index.search(query, k=3)
    relevant_documents = [passages[i] for i in I[0]]
    # specify the chat model and the messages
    response = openai.ChatCompletion.create(model='gpt-3.5-turbo', messages=messages + [
                                            {'role': 'system', 'content': f"Relevant documents: 1. {relevant_documents[0]['passage_text']} \n 2. {relevant_documents[1]['passage_text']} \n 3. {relevant_documents[2]['passage_text']} \n"}]).choices[0]['message']['content']
    messages.append({'role': 'assistant', 'content': response})
    print(response + '\n')
    for i, doc in enumerate(relevant_documents):
        print(f"[{i}]   {doc['passage_text']} \n")
    return response


In [None]:
ask('what year was she born')


In [None]:
messages
