In [1]:
from datasets import load_dataset
import ollama
import pandas as pd

train = load_dataset("rag-datasets/rag-mini-wikipedia", "text-corpus")
test = load_dataset("rag-datasets/rag-mini-wikipedia", "question-answer")

### Run the cell below if you don't use embed.csv

In [None]:
# vector_db = []
# import tqdm.notebook as tn

# def store_embed():
#     for i in tn.tqdm(range(3200)):
#         phrase = train['passages'][i]['passage']
#         embed = ollama.embed('llama3.2:1B', phrase)['embeddings'][0]
#         vector_db.append((phrase, embed))
#     return

# store_embed()

  0%|          | 0/3200 [00:00<?, ?it/s]

In [2]:
vector_db = pd.read_csv('embed.csv')

In [3]:
import ast

db = []
vector_db['1'] = vector_db['1'].apply(ast.literal_eval)  # Convert string representation of list to actual list
for i in range(3200):
    chunk = vector_db['0'][i]
    embed = vector_db['1'][i]
    db.append((chunk, embed))    

In [None]:
import torch

def cosine_sim(a, b):
    # Don't know why but using cuda is slower than cpu
    
    a_tensor = torch.tensor(a, dtype=torch.float32)
    b_tensor = torch.tensor(b, dtype=torch.float32)

    return torch.nn.functional.cosine_similarity(a_tensor, b_tensor, dim=0).item()


def retrieve(query, top_n=3):
    similarity = []
    query_embed = ollama.embed('llama3.2:1B', query)['embeddings'][0]

    for phrase, embeds in db:
        sim_score = cosine_sim(query_embed, embeds)
        similarity.append((phrase, sim_score))

    similarity.sort(key=lambda x: x[1], reverse=True) # Sort second element
    return similarity[:top_n]

In [15]:
import time

top_chunk = []
input_query = 'Was Abraham Lincoln the sixteenth President of the United States?'

start = time.time()
retrieve_knowledge = retrieve(input_query)
end = time.time()

print(f'Time took to retrieve: {end - start}')

print('Retrieved knowledge:')
for chunk, similarity in retrieve_knowledge:
    print(f' - (similarity: {similarity:.2f}) {chunk}')
    top_chunk.append(chunk)

Time took to retrieve: 1.7051129341125488
Retrieved knowledge:
 - (similarity: 0.72) Sixteen months before his death, his son, John Quincy Adams, became the sixth President of the United States (1825 1829), the only son of a former President to hold the office until George W. Bush in 2001.
 - (similarity: 0.72) Lincoln closely supervised the victorious war effort, especially the selection of top generals, including Ulysses S. Grant. Historians have concluded that he handled the factions of the Republican Party well, bringing leaders of each faction into his cabinet and forcing them to cooperate. Lincoln successfully defused a war scare with the United Kingdom in 1861. Under his leadership, the Union took control of the border slave states at the start of the war. Additionally, he managed his own reelection in the 1864 presidential election.
 - (similarity: 0.71) John Adams remains the longest-lived person ever elected to both of the highest offices in the United States.


In [25]:
instruction_prompt = f'''
You are a helpful chatbot that gives a concise and short answer.
Use only the following pieces of context to answer the question. Don't make up any new information:
{' '.join(chunk for chunk in top_chunk)}
'''

stream = ollama.chat(
    model='llama3.2:1B',
    messages=[
      {'role': 'system', 'content': instruction_prompt},
      {'role': 'user', 'content': input_query},
    ],
    stream=True,
)

# print the response from the chatbot in real-time
print('Chatbot response:')
for chunk in stream:
    print(chunk['message']['content'], end='', flush=True)


Chatbot response:
No, Abraham Lincoln was not the sixteenth President of the United States. He was the 16th President, serving from 1861 until his assassination in 1865.