In [4]:
pip install ollama chromadb transformers torch ragas


Collecting transformers
  Downloading transformers-4.48.0-py3-none-any.whl.metadata (44 kB)
Collecting torch
  Downloading torch-2.5.1-cp310-cp310-win_amd64.whl.metadata (28 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.5.2-cp38-abi3-win_amd64.whl.metadata (3.9 kB)
Collecting networkx (from torch)
  Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch)
  Downloading jinja2-3.1.5-py3-none-any.whl.metadata (2.6 kB)
Collecting sympy==1.13.1 (from torch)
  Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Collecting MarkupSafe>=2.0 (from jinja2->torch)
  Using cached MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl.metadata (4.1 kB)
Downloading transformers-4.48.0-py3-none-any.whl (9.7 MB)
   ---------------------------------------- 0.0/9.7 MB ? eta -:--:--
   ------ --------------------------------- 1.6/9.7 MB 9.3 MB/s eta 0:00:01
   ----------------- ---------------------- 4.2/9.7 MB 10.5 MB/s eta 0:00:01
   

In [8]:
from chromadb import Client
from chromadb.config import Settings
from transformers import BertTokenizer, BertModel
import torch
from datasets import load_dataset

In [10]:
chroma_client = Client()

In [11]:
collection = chroma_client.create_collection(name="my_collection")

In [12]:
# Load BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

In [39]:
dataset = load_dataset('squad', split='train[:1%]')  # Using a subset of the SQuAD dataset for demonstration purposes
dataset

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 876
})

In [14]:
# Function to compute embeddings
def get_embedding(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()  

In [15]:
# Populate ChromaDB with embeddings
for idx, item in enumerate(dataset):
    context = item['context']
    embedding = get_embedding(context)
    collection.add(
        ids=[str(idx)],  # Unique document ID
        embeddings=[embedding],
        metadatas=[{"context": context}]
    )

In [19]:
from ollama import chat
# Define a function to generate an answer for a given question and context
def generate_answer(question, context):
    prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
    response = chat(model='llama3.2:1b', messages=[{'role': 'user', 'content': prompt}])
    return response.message.content


In [20]:
# Define a function to run the RAG pipeline
def rag_pipeline(question):
    # Retrieve relevant context
    question_embedding = get_embedding(question)
    results = collection.query(query_embeddings=[question_embedding], n_results=1)
    context = results['documents'][0][0]  # Access the top result's context

    # Generate answer
    answer = generate_answer(question, context)
    return answer


In [21]:
question = "What is the capital of France?"
answer = rag_pipeline(question)

answer

'Paris.'

In [24]:
dataset = load_dataset('squad', split='validation[:10]')

In [25]:
questions = [item['question'] for item in dataset]
contexts = [item['context'] for item in dataset]
answers = [item['answers']['text'][0] for item in dataset]  

In [26]:
# Generate answers for the dataset
generated_answers = [rag_pipeline(question) for question in questions]

In [27]:
generated_answers

['The Carolina Panthers represented the NFC at Super Bowl 50.',
 'The Denver Broncos represented the NFC at Super Bowl 50.',
 "Super Bowl 50 took place at Levi's Stadium in Santa Clara, California.",
 'The Denver Broncos won Super Bowl 50.',
 "I'm not aware of any information about a 50th anniversary of the Super Bowl. The Super Bowl has been an annual event since its inception in 1967, and I couldn't find any record of a significant milestone like a 50th anniversary being celebrated. Can you provide more context or clarify what you are referring to?",
 "I'm not aware of any information about a Super Bowl called Super Bowl 50. The most recent Super Bowls were Super Bowl LIV (54) in 2020, between the Kansas City Chiefs and the San Francisco 49ers, and Super Bowl LV (55) in 2021, between the Tampa Bay Buccaneers and the Kansas City Chiefs. If you could provide more context or clarify which Super Bowl you are referring to, I'd be happy to try and assist you further.",
 "I don't have any i

In [45]:
# Prepare evaluation data
evaluation_data = [
    {
        'user_input': question,
        'retrieved_contexts': [context],
        'reference':ground_truth,
        'response': generated,
        'ground_truth_answer': ground_truth
    }
    for question, context, generated, ground_truth in zip(questions, contexts, generated_answers, answers)
]

In [32]:
!pip install -U langchain-ollama

Collecting langchain-ollama
  Downloading langchain_ollama-0.2.2-py3-none-any.whl.metadata (1.9 kB)
Downloading langchain_ollama-0.2.2-py3-none-any.whl (18 kB)
Installing collected packages: langchain-ollama
Successfully installed langchain-ollama-0.2.2


In [48]:
from ragas import evaluate
from ragas import EvaluationDataset
from ragas.llms import LangchainLLMWrapper
from langchain_ollama.llms import OllamaLLM

llm = OllamaLLM(model="llama3.2:1b")
ragas_dataset = EvaluationDataset.from_dict(evaluation_data)
evaluator_llm = LangchainLLMWrapper(llm)
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness

result = evaluate(dataset=ragas_dataset,metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness()],llm=evaluator_llm)
result

Evaluating:   0%|          | 0/30 [00:00<?, ?it/s]Exception raised in Job[5]: TimeoutError()
Exception raised in Job[13]: TimeoutError()
Exception raised in Job[9]: TimeoutError()
Exception raised in Job[12]: TimeoutError()
Exception raised in Job[11]: TimeoutError()
Evaluating:   3%|▎         | 1/30 [03:00<1:27:01, 180.05s/it]Exception raised in Job[3]: TimeoutError()
Exception raised in Job[6]: TimeoutError()
Exception raised in Job[2]: TimeoutError()
Exception raised in Job[1]: TimeoutError()
Exception raised in Job[4]: TimeoutError()
Exception raised in Job[8]: TimeoutError()
Exception raised in Job[10]: TimeoutError()
Exception raised in Job[7]: TimeoutError()
Exception raised in Job[14]: TimeoutError()
Exception raised in Job[15]: TimeoutError()
Exception raised in Job[0]: TimeoutError()
Exception raised in Job[16]: TimeoutError()
Exception raised in Job[17]: TimeoutError()
Evaluating:  57%|█████▋    | 17/30 [06:00<03:58, 18.33s/it]  Exception raised in Job[18]: TimeoutError()
Ex

{'context_recall': nan, 'faithfulness': nan, 'factual_correctness': nan}