In [47]:
import chromadb
from datasets import load_dataset
import pandas as pd

train = load_dataset("rag-datasets/rag-mini-wikipedia", "text-corpus")
test = load_dataset("rag-datasets/rag-mini-wikipedia", "question-answer")

client = chromadb.Client()
# Create a table to store data
collection = client.get_or_create_collection('Test')

In [55]:
test['test']['answer'][0]

'yes'

## Test info

In [3]:
import ast

vector_db = pd.read_csv('embed.csv')

embed_db = []
text_db = []

vector_db['1'] = vector_db['1'].apply(ast.literal_eval)  # Convert string representation of list to actual list
for i in range(3200):
    chunk = vector_db['0'][i]
    embed = vector_db['1'][i]

    text_db.append(chunk)
    embed_db.append(embed)

In [4]:
collection.add(
    ids=[str(ids) for ids in range(3200)],
    embeddings=embed_db,
    metadatas=[{"source": f'Document: {i}', 'text': text_db[i]} for i in range(3200)]
)

In [38]:
from chromadb.utils.embedding_functions import OllamaEmbeddingFunction

llama_embeder = OllamaEmbeddingFunction(model_name='llama3.2:1B')
res_list = []
test_num = 100

for i in range(test_num):
    input_query = test['test']['question'][i] 

    q_embed = llama_embeder([input_query])

    res_list.append(collection.query(
        query_embeddings=q_embed,
        n_results=2
    ))

res_list[0]

{'ids': [['697', '279']],
 'embeddings': None,
 'documents': [[None, None]],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[{'source': 'Document: 697',
    'text': 'Sixteen months before his death, his son, John Quincy Adams, became the sixth President of the United States (1825 1829), the only son of a former President to hold the office until George W. Bush in 2001.'},
   {'source': 'Document: 279',
    'text': 'Lincoln closely supervised the victorious war effort, especially the selection of top generals, including Ulysses S. Grant. Historians have concluded that he handled the factions of the Republican Party well, bringing leaders of each faction into his cabinet and forcing them to cooperate. Lincoln successfully defused a war scare with the United Kingdom in 1861. Under his leadership, the Union took control of the border slave states at the start of the war. Additionally, he managed his own reelection in the 1864 presidential

In [68]:
import ollama
from tqdm._tqdm_notebook import tqdm

eval_list = []

for i in tqdm(range(100)):
  instruction_prompt = f'''
  You are a helpful chatbot that gives a concise and short answer.
  Use only the following pieces of context to answer the question. Don't make up any new information:
  {res_list[i]['metadatas'][0][0]['text'], res_list[i]['metadatas'][0][1]['text']}
  '''

  eval_prompt = '''
  You are an answer evaluator. Compare the correct answer with the predicted answer and determine if they match.
  The predicted answer must be CONSISTENT and CLEAR throughout - contradictory statements should be marked as incorrect.

  You can ONLY output:
  - "1" if the predicted answer is correct, consistent, and substantially matches the correct answer
  - "0" if the predicted answer is incorrect, contradictory, unclear, or does not match the correct answer

  Do not provide any explanation, just output the number.
  '''

  answer = ollama.chat(
      model='llama3.2:1B',
      messages=[
        {'role': 'system', 'content': instruction_prompt},
        {'role': 'user', 'content': test['test']['question'][i]},
      ]
  )

  # print the response from the chatbot in real-time
  # print('Chatbot response:')
  # print(answer['message']['content'])

  # Get the correct answer from your test dataset
  correct_answer = test['test']['answer'][i]  # Adjust index as needed
  predicted_answer = answer['message']['content']

  evaluation_input = f"Correct answer: {correct_answer}\nPredicted answer: {predicted_answer}"

  evaluation = ollama.chat(
      model='llama3.2:1B',
      messages=[
        {'role': 'system', 'content': eval_prompt},
        {'role': 'user', 'content': evaluation_input},
      ]
  )


  evaluation_score = evaluation['message']['content'].strip()
  eval_list.append(int(evaluation_score))

  0%|          | 0/100 [00:00<?, ?it/s]

In [69]:
correct = 0
for i in eval_list:
    if i == 1:
        correct += 1
correct

97

In [62]:
test['test']['answer'][3]

'18 months'