In [1]:
import os
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import chromadb
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [2]:
model_name = "BAAI/bge-base-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
hf = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  from .autonotebook import tqdm as notebook_tqdm
Downloading (…)c98f4/.gitattributes: 100%|██████████| 1.52k/1.52k [00:00<00:00, 4.28MB/s]
Downloading (…)_Pooling/config.json: 100%|██████████| 190/190 [00:00<00:00, 1.71MB/s]
Downloading (…)ac29ac98f4/README.md: 100%|██████████| 88.8k/88.8k [00:00<00:00, 8.46MB/s]
Downloading (…)29ac98f4/config.json: 100%|██████████| 719/719 [00:00<00:00, 4.20MB/s]
Downloading (…)ce_transformers.json: 100%|██████████| 124/124 [00:00<00:00, 876kB/s]
Downloading model.safetensors: 100%|██████████| 438M/438M [00:25<00:00, 17.3MB/s] 
Downloading pytorch_model.bin: 100%|██████████| 438M/438M [00:24<00:00, 17.5MB/s] 
Downloading (…)nce_bert_config.json: 100%|██████████| 52.0/52.0 [00:00<00:00, 310kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 1.00MB/s]
Downloading (…)c98f4/tokenizer.json: 100%|██████████| 711k/711k [00:00<00:00, 3.46MB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 366/366 [00:00<00:00, 3.03MB/s]


In [26]:
# from langchain.llms import LlamaCpp
llm = LlamaCpp(
    model_path="/Users/Shared/Models/llama-2-13b.Q5_K_M.gguf",
    n_gpu_layers=99,
    n_batch=512,
    n_ctx=2048,
    # f16_kv=True,  
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    verbose=True,
)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from /Users/Shared/Models/llama-2-13b.Q5_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q5_K     [  5120, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 13824,  5120,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q5_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q5_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q5_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q5_K     [  5120,  512

In [4]:
page_files = sorted(os.listdir('../data/1_html_to_md/'))
# page_ids = list(map(lambda x: x.split('.')[0], page_files))

pages = []
for page_file in page_files:
    page = open(f'../data/1_html_to_md/{page_file}', 'r').read()
    pages.append(page)

whole_dataset = "\n".join(pages)

In [27]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
texts = text_splitter.split_text(whole_dataset)

In [28]:
len(texts)

5511

In [29]:
embeddings = hf.embed_documents(texts)

In [8]:
client = chromadb.PersistentClient(path="chroma/cibc_public_pages_BGE")
# checks whether collection exists

collection = client.get_or_create_collection(name="cibc_public_split-1024-128_BGE", metadata={"hnsw:space": "cosine"})

In [None]:
metadatas = []
for page_name in page_files:
    tags = page_name.split(".")[0].split("_")
    tags = list(filter(lambda x: len(x) > 1, tags))
    tags_dict = {f"tag_{i}":tags[i] for i in range(len(tags))}
    metadatas.append(tags_dict)

In [14]:
page_ids = [f"doc_{i}" for i in range(len(texts))]

In [17]:
collection.add(
    documents=texts,
    embeddings=embeddings,
    # metadatas=metadatas,
    ids=page_ids,
)

In [20]:
question = "What are the credit cards offered by CIBC and what are some questions that I should ask my client to decide which one is the best fit for them?"
q_embedding = hf.embed_query(question)
response = collection.query(
    query_embeddings=q_embedding,
    n_results=10,
    include=["documents", "distances"],
    #where={"metadata_field": "is_equal_to_this"},
    #where_document={"$contains":"sock"}
)

In [21]:
response

{'ids': [['doc_1806',
   'doc_1535',
   'doc_699',
   'doc_1587',
   'doc_1537',
   'doc_1543',
   'doc_10611',
   'doc_1566',
   'doc_1900',
   'doc_1682']],
 'distances': [[0.10316681861877441,
   0.111957848072052,
   0.11583435535430908,
   0.1209336519241333,
   0.121803879737854,
   0.12239348888397217,
   0.12528443336486816,
   0.12539398670196533,
   0.1263878345489502,
   0.12898635864257812]],
 'metadatas': None,
 'embeddings': None,
 'documents': [['CIBC has several types of credit cards, including those with cash back and travel rewards programs. Find the cards that best fit your needs by using the\n \n credit card selector tool\n \n . If you have questions about CIBC credit cards or need further help choosing one, you can reach a customer care representative at\xa01-800-465-4653.\n \n\n More articles\n \n More articles about C I B C credit cards\n \n\n---',
   'Applying for a credit card is a great first step in building your credit history and improving your financial\n 

In [22]:
qa_template = """You are tasked with answering a single question given a context. If you are uncertain about the answer, simply respond 'I do not know.'\n\nContext: {CONTEXT}\n\nQuestion: {QUESTION}"""

multiple_docs_template = """Document: {DOC_NAME}\n Content:\n\n```plaintext\n{CONTENT}\n```"""

In [23]:
context = []
for id, doc in zip(response["ids"][0], response["documents"][0]):
    context.append(multiple_docs_template.format(DOC_NAME=id, CONTENT=doc))

In [24]:
context

['Document: doc_1806\n Content:\n\n```plaintext\nCIBC has several types of credit cards, including those with cash back and travel rewards programs. Find the cards that best fit your needs by using the\n \n credit card selector tool\n \n . If you have questions about CIBC credit cards or need further help choosing one, you can reach a customer care representative at\xa01-800-465-4653.\n \n\n More articles\n \n More articles about C I B C credit cards\n \n\n---\n```',
 'Document: doc_1535\n Content:\n\n```plaintext\nApplying for a credit card is a great first step in building your credit history and improving your financial\n \n well-being — if\n \n you use your card responsibly. You can speed up the credit card application process by having the right information ready before you apply. If you’re\xa0 not sure which card to pick, we’ll help you choose the best CIBC credit card for you.\n \n\n---\n\n What information do I need to apply?\n---------------------------------------\n```',
 'Do

In [25]:
for i in range(len(context)):
    final_prompt =  qa_template.format(CONTEXT=context[1], QUESTION=question)
    #print(get_completion(final_prompt, temperature=0, max_tokens=256))
    llm(final_prompt, temperature=0.5, max_tokens=256)







llama_print_timings:        load time =  1411.17 ms
llama_print_timings:      sample time =     1.33 ms /     2 runs   (    0.67 ms per token,  1499.25 tokens per second)
llama_print_timings: prompt eval time =  1411.10 ms /   193 tokens (    7.31 ms per token,   136.77 tokens per second)
llama_print_timings:        eval time =    43.44 ms /     1 runs   (   43.44 ms per token,    23.02 tokens per second)
llama_print_timings:       total time =  1459.40 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1411.17 ms
llama_print_timings:      sample time =     1.39 ms /     2 runs   (    0.70 ms per token,  1435.75 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    86.49 ms /     2 runs   (   43.25 ms per token,    23.12 tokens per second)
llama_print_timings:       total time =    90.35 ms
Llama.generate: prefix-match hit

llama_pri







llama_print_timings:        load time =  1411.17 ms
llama_print_timings:      sample time =     1.82 ms /     2 runs   (    0.91 ms per token,  1097.09 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    86.62 ms /     2 runs   (   43.31 ms per token,    23.09 tokens per second)
llama_print_timings:       total time =    91.85 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1411.17 ms
llama_print_timings:      sample time =     2.15 ms /     2 runs   (    1.08 ms per token,   928.51 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    87.34 ms /     2 runs   (   43.67 ms per token,    22.90 tokens per second)
llama_print_timings:       total time =    93.51 ms
Llama.generate: prefix-match hit

llama_pri



Answer:

* [CIBC Dividend® Visa Infinite* Card](https://www.cibc.com/en/personal-banking/credit-cards/dividend-visa-infinite.html)
* [CIBC Aventura® Visa Infinite* Card](https://www.cibc.com/en/personal-banking/credit-cards/aventura-visa-infinite.html)
* [CIBC Dividend® Visa* Card](https://www.cibc.com/en/personal-banking/credit-cards/dividend-visa.html)
* [CIBC Aventura® World Mastercard®](https://www.cibc.com/en/personal-banking/credit-cards/aventura-world-mastercard.html)
* [CIBC Dividend® World Elite® Mastercard®](https://www.cibc.com/en/personal-banking/credit-cards/dividend-world-elite-mastercard.html)

* Questions:




llama_print_timings:        load time =  1411.17 ms
llama_print_timings:      sample time =   303.08 ms /   256 runs   (    1.18 ms per token,   844.67 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time = 13648.78 ms /   256 runs   (   53.32 ms per token,    18.76 tokens per second)
llama_print_timings:       total time = 14679.71 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1411.17 ms
llama_print_timings:      sample time =     2.49 ms /     2 runs   (    1.25 ms per token,   801.92 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   113.08 ms /     2 runs   (   56.54 ms per token,    17.69 tokens per second)
llama_print_timings:       total time =   119.47 ms
Llama.generate: prefix-match hit






llama_print_timings:        load time =  1411.17 ms
llama_print_timings:      sample time =     2.64 ms /     2 runs   (    1.32 ms per token,   758.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   112.07 ms /     2 runs   (   56.04 ms per token,    17.85 tokens per second)
llama_print_timings:       total time =   119.52 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  1411.17 ms
llama_print_timings:      sample time =     2.75 ms /     2 runs   (    1.38 ms per token,   726.48 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   112.39 ms /     2 runs   (   56.20 ms per token,    17.79 tokens per second)
llama_print_timings:       total time =   120.12 ms
