In [1]:
from utils import get_embedding, get_completion, LlamaEmbeddingFunction
import os

In [2]:
page_files = sorted(os.listdir('./data/harry_potter/pages/book1'))
page_ids = list(map(lambda x: x.split('.')[0], page_files))
# page_ids = list(map(lambda x: x.split('.')[0].replace(" ", "_"), page_files[:17]))

In [3]:
#load all pages into memory 
pages = []
for page_file in page_files:
    page = open(f'data/harry_potter/pages/book1/{page_file}', 'r').read()
    pages.append(page)

--------

ChromaDB

In [4]:
import chromadb

In [5]:
client = chromadb.PersistentClient(path="chroma/harry_potter_pages")

In [6]:
client.list_collections()

[Collection(name=harry_potter_pages)]

In [7]:
# checks whether collection exists

collection = client.get_or_create_collection(name="harry_potter_pages", embedding_function=LlamaEmbeddingFunction(), metadata={"hnsw:space": "cosine"})

In [8]:
metadatas = []
for id in page_ids:
    c_number = id.split("_")[0]
    metadatas.append({"page_number": c_number})

In [9]:
# collection.add(
#     documents=pages,
#     metadatas=metadatas,
#     ids=page_ids,
# )

-------

Querying

In [10]:
question = "Who imagines himself holding socks when looking at the mirror of erised?"
q_embedding = get_embedding(question)
response = collection.query(
    query_embeddings=q_embedding,
    n_results=5,
    include=["documents", "distances"],
    #where={"metadata_field": "is_equal_to_this"},
    #where_document={"$contains":"sock"}
)

In [11]:
response

{'ids': [['page_49', 'page_239', 'page_298', 'page_181', 'page_273']],
 'distances': [[0.2069302797317505,
   0.2486249804496765,
   0.2943788170814514,
   0.29575181007385254,
   0.2987077236175537]],
 'metadatas': None,
 'embeddings': None,
 'documents': [['The whole shack shivered and Harry sat bolt upright, \nstaring at the door. Someone was outside, knocking \nto come in.',
   'and I didn’t get a single pair. People will insist on \ngiving me books.” \n\nIt was only when he was back in bed that it struck \nHarry that Dumbledore might not have been quite \ntruthful. But then, he thought, as he shoved \nScabbers off his pillow, it had been quite a personal \nquestion.',
   '“I shouldn’ta told yeh that!” he blurted out. “Forget I \nsaid it! Hey — where’re yeh goin’?” \n\nHarry, Ron, and Hermione didn’t speak to each other \nat all until they came to a halt in the entrance hall, \nwhich seemed very cold and gloomy after the \ngrounds. \n\n“We’ve got to go to Dumbledore,” said Harry. “

In [12]:
qa_template = """You are tasked with answering a single question given a context\n\nContext: {CONTEXT}\n\nQuestion: {QUESTION}"""

multiple_docs_template = """Document: {DOC_NAME}\n Content:\n\n```plaintext\n{CONTENT}\n```"""

In [13]:
context = []
for id, doc in zip(response["ids"][0], response["documents"][0]):
    context.append(multiple_docs_template.format(DOC_NAME=id, CONTENT=doc))
    

In [14]:
context

['Document: page_49\n Content:\n\n```plaintext\nThe whole shack shivered and Harry sat bolt upright, \nstaring at the door. Someone was outside, knocking \nto come in.\n```',
 'Document: page_239\n Content:\n\n```plaintext\nand I didn’t get a single pair. People will insist on \ngiving me books.” \n\nIt was only when he was back in bed that it struck \nHarry that Dumbledore might not have been quite \ntruthful. But then, he thought, as he shoved \nScabbers off his pillow, it had been quite a personal \nquestion.\n```',
 'Document: page_298\n Content:\n\n```plaintext\n“I shouldn’ta told yeh that!” he blurted out. “Forget I \nsaid it! Hey — where’re yeh goin’?” \n\nHarry, Ron, and Hermione didn’t speak to each other \nat all until they came to a halt in the entrance hall, \nwhich seemed very cold and gloomy after the \ngrounds. \n\n“We’ve got to go to Dumbledore,” said Harry. “Hagrid \ntold that stranger how to get past Fluffy, and it was \neither Snape or Voldemort under that cloak — it

In [15]:
final_prompt =  qa_template.format(CONTEXT=context[1], QUESTION=question)

In [16]:
final_prompt

'You are tasked with answering a single question given a context\n\nContext: Document: page_239\n Content:\n\n```plaintext\nand I didn’t get a single pair. People will insist on \ngiving me books.” \n\nIt was only when he was back in bed that it struck \nHarry that Dumbledore might not have been quite \ntruthful. But then, he thought, as he shoved \nScabbers off his pillow, it had been quite a personal \nquestion.\n```\n\nQuestion: Who imagines himself holding socks when looking at the mirror of erised?'

In [17]:
get_completion(final_prompt, temperature=0, max_tokens=64)

"\n\nAnswer: Harry Potter\n\nExplanation:\n\nThe context is about Harry Potter and Dumbledore. The question is asked by Dumbledore to Harry, who answers that he doesn't know.\n\nComment: I think this answer is correct but it would be better if you could explain why the answer is correct."