In [1]:
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, RetrievalQA
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader, DirectoryLoader

In [2]:
from langchain.embeddings import HuggingFaceBgeEmbeddings

In [3]:

model_name = "BAAI/bge-large-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
hf = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  from .autonotebook import tqdm as notebook_tqdm
Downloading (…)f6537/.gitattributes: 100%|██████████| 1.52k/1.52k [00:00<00:00, 4.80MB/s]
Downloading (…)_Pooling/config.json: 100%|██████████| 191/191 [00:00<00:00, 607kB/s]
Downloading (…)061e4f6537/README.md: 100%|██████████| 89.0k/89.0k [00:00<00:00, 7.51MB/s]
Downloading (…)1e4f6537/config.json: 100%|██████████| 720/720 [00:00<00:00, 1.43MB/s]
Downloading (…)ce_transformers.json: 100%|██████████| 124/124 [00:00<00:00, 747kB/s]
Downloading model.safetensors: 100%|██████████| 1.34G/1.34G [01:15<00:00, 17.8MB/s]
Downloading pytorch_model.bin: 100%|██████████| 1.34G/1.34G [01:17<00:00, 17.3MB/s]
Downloading (…)nce_bert_config.json: 100%|██████████| 52.0/52.0 [00:00<00:00, 148kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 213kB/s]
Downloading (…)f6537/tokenizer.json: 100%|██████████| 711k/711k [00:00<00:00, 7.42MB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 366/366 [00:00<00:00, 652kB/s]
D

In [4]:
# from langchain.llms import LlamaCpp
llm = LlamaCpp(
    model_path="/Users/Shared/Models/llama-2-13b.Q5_K_M.gguf",
    n_gpu_layers=99,
    n_batch=512,
    n_ctx=2048,
    f16_kv=True,  
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    verbose=True,
)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from /Users/Shared/Models/llama-2-13b.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  5120, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 13824,  5120,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q5_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q5_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q5_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q5_K     [  5120,  512

In [5]:
from utils import get_embedding, get_completion, LlamaEmbeddingFunction
import os

In [6]:
page_files = sorted(os.listdir('./data/harry_potter/pages/book1'))
page_ids = list(map(lambda x: x.split('.')[0], page_files))
# loader = DirectoryLoader('./data/harry_potter/pages/book1', glob='*.txt',)
# documents = loader.load()
# page_ids = list(map(lambda x: x.split('.')[0].replace(" ", "_"), page_files[:17]))

In [7]:
#load all pages into memory 
pages = []
for page_file in page_files:
    page = open(f'data/harry_potter/pages/book1/{page_file}', 'r').read()
    pages.append(page)

In [8]:
embeddings = hf.embed_documents(pages)

In [None]:
# vectordb = Chroma.from_documents(documents=documents, embedding=hf, persist_directory="./chroma/harry_potter_book1_pages_BGE/")

In [None]:
# retriever = vectordb.as_retriever(search_kwargs={'n': 5})

In [None]:
# qa_chain = RetrievalQA.from_chain_type(llm=llm, 
#                                        retriever=retriever, 
#                                        return_source_documents=True,
#                                        chain_type="stuff")

In [None]:
# qa_chain("Who imagines himself holding socks when looking at the mirror of erised?")

--------

ChromaDB

In [9]:
import chromadb

In [11]:
client = chromadb.PersistentClient(path="chroma/harry_potter_book1_pages_BGE")

In [12]:
client.list_collections()

[]

In [14]:
# checks whether collection exists

collection = client.get_or_create_collection(name="harry_potter_book1_pages_BGE", metadata={"hnsw:space": "cosine"})

In [15]:
metadatas = []
for id in page_ids:
    c_number = id.split("_")[0]
    metadatas.append({"page_number": c_number})

In [16]:
collection.add(
    documents=pages,
    embeddings=embeddings,
    metadatas=metadatas,
    ids=page_ids,
)

-------

Querying

In [101]:
# question = "Whats the name of the wizards bank and what creatures work as bank clerks there?"
# question = "Who is hogwarts principal?"
# question = "What are the objects used to play Quidditch?"
# question = "What is the highest scoring move in Quidditch?"
# question = "Where is hogwarts located?"
# question = "What words and movements are used to cast the levitation charm?"
# question = "What are the qualities attributed to a member of Ravenclaw?"
# question = "What are the qualities attributed to a member of Slytherin?"
# question = "What are the qualities attributed to a member of Griffindor?"
question = "What are the qualities attributed to a member of Hufflepuff?"
q_embedding = hf.embed_query(question)
response = collection.query(
    query_embeddings=q_embedding,
    n_results=5,
    include=["documents", "distances"],
    #where={"metadata_field": "is_equal_to_this"},
    #where_document={"$contains":"sock"}
)

In [102]:
response

{'ids': [['page_131', 'page_88', 'page_127', 'page_342', 'page_12']],
 'distances': [[0.17718505859375,
   0.19723252938765856,
   0.19842612743377686,
   0.19862735271453857,
   0.20008856058120728]],
 'metadatas': None,
 'embeddings': None,
 'documents': [['Your top hats sleek and tall, \n\nFor I’m the Hogwarts Sorting Hat \nAnd I can cap them all. \n\nThere’s nothing hidden in your head \nThe Sorting Hat can’t see, \n\nSo try me on and I will tell you \nWhere you ought to be. \n\nYou might belong in Gryffindor, \n\nWhere dwell the brave at heart, \n\nTheir daring, nerve, and chivalry \nSet Gryffindors apart; \n\nYou might belong in Hufflepuff, \n\nWhere they are just and loyal, \n\nThose patient Hufflepuffs are true \nAnd unafraid of toil; \n\nOr yet in wise old Ravenclaw, \n\nIf you’ve a ready mind, \n\nWhere those of wit and learning, \n\nWill always find their kind; \n\nOr perhaps in Slytherin',
   '“Blimey, Harry, I keep forgettin’ how little yeh know — \nnot knowin’ about Quidd

In [103]:
qa_template = """You are tasked with answering a single question given a context\n\nContext: {CONTEXT}\n\nQuestion: {QUESTION}"""

multiple_docs_template = """Document: {DOC_NAME}\n Content:\n\n```plaintext\n{CONTENT}\n```"""

In [104]:
context = []
for id, doc in zip(response["ids"][0], response["documents"][0]):
    context.append(multiple_docs_template.format(DOC_NAME=id, CONTENT=doc))
    

In [105]:
context

['Document: page_131\n Content:\n\n```plaintext\nYour top hats sleek and tall, \n\nFor I’m the Hogwarts Sorting Hat \nAnd I can cap them all. \n\nThere’s nothing hidden in your head \nThe Sorting Hat can’t see, \n\nSo try me on and I will tell you \nWhere you ought to be. \n\nYou might belong in Gryffindor, \n\nWhere dwell the brave at heart, \n\nTheir daring, nerve, and chivalry \nSet Gryffindors apart; \n\nYou might belong in Hufflepuff, \n\nWhere they are just and loyal, \n\nThose patient Hufflepuffs are true \nAnd unafraid of toil; \n\nOr yet in wise old Ravenclaw, \n\nIf you’ve a ready mind, \n\nWhere those of wit and learning, \n\nWill always find their kind; \n\nOr perhaps in Slytherin\n```',
 'Document: page_88\n Content:\n\n```plaintext\n“Blimey, Harry, I keep forgettin’ how little yeh know — \nnot knowin’ about Quidditch!” \n\n“Don’t make me feel worse,” said Harry. He told \nHagrid about the pale boy in Madam Malkin’s. \n\n“ — and he said people from Muggle families shouldn’

In [106]:
for i in range(len(context)):
    final_prompt =  qa_template.format(CONTEXT=context[1], QUESTION=question)
    print(get_completion(final_prompt, temperature=0, max_tokens=256))



Answer:

```plaintext
Hufflepuff is known for its members' loyalty, dedication, hard work, patience, fair play and tolerance of those different from themselves. 
```


Answer:

```plaintext
Hufflepuff is known for its members' dedication, loyalty, and hard work. 
```


Answer:

```plaintext
Hufflepuff is known for its members' dedication, loyalty, and hard work. 
```


Answer:

```plaintext
Hufflepuff is known for its members' dedication, loyalty, and hard work. 
```


Answer:

```plaintext
Hufflepuff is known for its members' dedication, loyalty, and hard work. 
```


In [None]:
final_prompt

In [None]:
get_completion(final_prompt, temperature=0, max_tokens=64)