In [1]:
import os
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import chromadb
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [2]:
model_name = "BAAI/bge-base-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
hf = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# from langchain.llms import LlamaCpp
llm = LlamaCpp(
    model_path="/Users/admin/Developer/models/meta/codellama/codellama-7b-instruct.Q5_K_M.gguf",
    #n_gpu_layers=99,
    #n_batch=512,
    n_ctx=4096,
    # f16_kv=True,  
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    verbose=True,
)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/admin/Developer/models/meta/codellama/codellama-7b-instruct.Q5_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = codellama_codellama-7b-instruct-hf
llama_model_loader: - kv   2:                       llama.context_length u32              = 16384
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_lo

In [4]:
page_files = sorted(os.listdir('../data/1_html_to_md/'))
# page_ids = list(map(lambda x: x.split('.')[0], page_files))

pages = []
for page_file in page_files:
    page = open(f'../data/1_html_to_md/{page_file}', 'r').read()
    pages.append(page)

whole_dataset = "\n".join(pages)

In [5]:
cs = 1000
co = 0 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=cs, chunk_overlap=co)
texts = text_splitter.split_text(whole_dataset)

In [6]:
len(texts)

5220

In [8]:
embeddings = hf.embed_documents(texts)

In [9]:
client = chromadb.PersistentClient(path="chroma/cibc_public_pages_personal_banking_BGE")
# checks whether collection exists

collection = client.get_or_create_collection(name="cibc_public_pages_personal_banking_split_BGE", metadata={"hnsw:space": "cosine"})

In [10]:
metadatas = []
for page_name in page_files:
    tags = page_name.split(".")[0].split("_")
    tags = list(filter(lambda x: len(x) > 1, tags))
    tags_dict = {f"tag_{i}":tags[i] for i in range(len(tags))}
    metadatas.append(tags_dict)

In [11]:
page_ids = [f"doc_{i}" for i in range(len(texts))]

In [12]:
collection.add(
    documents=texts,
    embeddings=embeddings,
    # metadatas=metadatas,
    ids=page_ids,
)

In [13]:
question = "What are the credit cards offered by CIBC and what are some questions that I should ask my client to decide which one is the best fit for them?"
q_embedding = hf.embed_query(question)
response = collection.query(
    query_embeddings=q_embedding,
    n_results=10,
    include=["documents", "distances"],
    #where={"metadata_field": "is_equal_to_this"},
    #where_document={"$contains":"sock"}
)

In [14]:
response

{'ids': [['doc_4718',
   'doc_4456',
   'doc_749',
   'doc_665',
   'doc_624',
   'doc_664',
   'doc_632',
   'doc_305',
   'doc_2',
   'doc_517']],
 'distances': [[0.11698174476623535,
   0.1271049976348877,
   0.12733739614486694,
   0.1276116967201233,
   0.12782293558120728,
   0.12910884618759155,
   0.12940311431884766,
   0.13134276866912842,
   0.1316523551940918,
   0.13515597581863403]],
 'metadatas': None,
 'embeddings': None,
 'documents': [['Need help choosing a bank account, credit card or learning about credit card rewards? These tools make it easy.\n \n\n Bank account selector\n \n\n Chequing, savings or both? Find out which CIBC bank account is right for you.\n \n\n Credit card selector\n \n\n Answer key questions to find the right credit card for\n \n your needs.\n \n\n Credit card rewards calculator\n \n\n Find out how many reward points you could be earning with a CIBC\n \n credit card.\n \n\n Smart Advice virtual events\n------------------------------\n\n Join us f

In [15]:
qa_template = """You are tasked with answering a single question given a context. If you are uncertain about the answer, simply respond 'I do not know.'\n\nContext: {CONTEXT}\n\nQuestion: {QUESTION}"""

multiple_docs_template = """Document: {DOC_NAME}\n Content:\n\n```plaintext\n{CONTENT}\n```"""

In [16]:
context = []
for id, doc in zip(response["ids"][0], response["documents"][0]):
    context.append(multiple_docs_template.format(DOC_NAME=id, CONTENT=doc))

In [17]:
context

['Document: doc_4718\n Content:\n\n```plaintext\nNeed help choosing a bank account, credit card or learning about credit card rewards? These tools make it easy.\n \n\n Bank account selector\n \n\n Chequing, savings or both? Find out which CIBC bank account is right for you.\n \n\n Credit card selector\n \n\n Answer key questions to find the right credit card for\n \n your needs.\n \n\n Credit card rewards calculator\n \n\n Find out how many reward points you could be earning with a CIBC\n \n credit card.\n \n\n Smart Advice virtual events\n------------------------------\n\n Join us for our Smart Advice virtual events to learn from our experts about investing, estate planning, goal-setting and more.\n \n\n View events\n \n\n Ready to take charge of\n \n your finances?\n--------------------------------------------\n\n Get in touch. We’re here\n \n to help.\n \n\n Meet with us\n \n\n Opens in a new window.\n \n\n Call us\n \n\n Opens in a new window.\n \n\n Find a CIBC\n \n Banking Centre

In [None]:
for i in range(len(context)):
    final_prompt =  qa_template.format(CONTEXT=context[1], QUESTION=question)
    #print(get_completion(final_prompt, temperature=0, max_tokens=256))
    llm(final_prompt, temperature=0.5, max_tokens=256)