In [None]:
from chat_solution.create_db import create_db

db = create_db()

In [None]:
print(db.retrieve("How do you pick a green?"))

In [None]:
# User input and response handling
from chat_solution.rag import QuizRag

query1 = "what is up?"
query2 = "How do you pick a green?"
rag = QuizRag()  
response = rag.query(query2)
print(response)

In [32]:
# Convert each text chunk to a LangChain Document
from langchain.schema import Document
from chat_solution.create_db import create_text_chunks_from_workshop_data

text_chunks = create_text_chunks_from_workshop_data()
# add all the content chunks to a list of LangChain Documents
langchain_docs = [
    Document(page_content=text, metadata={"source": f"chunk_{i+1}"})
    for i, text in enumerate(text_chunks)
]

print(len(langchain_docs))

Created 6 chunks of size 700 with overlap 200
6


In [28]:
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness
from langchain_mistralai import ChatMistralAI
from ragas.llms import LangchainLLMWrapper

evaluator_llm = LangchainLLMWrapper(ChatMistralAI(model="mistral-large-latest"))
metrics = [LLMContextRecall(), FactualCorrectness(), Faithfulness()]


In [24]:
from langchain_huggingface import HuggingFaceEmbeddings
from ragas.embeddings import LangchainEmbeddingsWrapper

generator_llm = LangchainLLMWrapper(ChatMistralAI(model="mistral-large-latest"))
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
generator_embeddings = LangchainEmbeddingsWrapper(embedding_model)

In [34]:
from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm)
dataset = generator.generate_with_langchain_docs(langchain_docs,transforms_embedding_model=generator_embeddings, testset_size=10)



Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  25%|██▌       | 3/12 [00:28<01:03,  7.10s/it]Property 'themes' already exists in node '2b5dc9'. Skipping!
Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  42%|████▏     | 5/12 [00:37<00:35,  5.02s/it]Property 'themes' already exists in node 'fd4011'. Skipping!
Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  58%|█████▊    | 7/12 [00:55<00:37,  7.44s/it]Property 'themes' already exists in node 'cbb6db'. Skipping!
Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  75%|███████▌  | 9/12 [01:13<00:27,  9.02s/it]Property 'themes' already exists in node '8ae330'. Skipping!
Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  83%|████████▎ | 10/12 [01:18<00:15,  7.77s/it]Property 'themes' already exists in node '9ecbc5'. Skipping!
Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  92%|█████████▏| 11/12 [01:20<00:06,  6.14s/it]Property 'themes' already exists in node 'ced

ValueError: 'a' cannot be empty unless no samples are taken

In [None]:
## Prepared questions dataset from huggingface
from datasets import load_dataset
dataset = load_dataset("atitaarora/food_lab_green_qna", split="train")
len(dataset)

In [None]:
## Sample question data
dataset[2]

In [None]:
#from ragas import evaluate
#results = evaluate(dataset=dataset, metrics=metrics, llm=evaluator_llm)

In [155]:
## Preparation of Eval dataset for RAGAS (https://docs.ragas.io/en/stable/concepts/components/eval_sample/?h=singleturnsample#example)
##for ragas dataset needs to be in the designated format 
from ragas import EvaluationDataset, SingleTurnSample
from ragas.metrics import Faithfulness
from datasets import load_dataset
from ragas import evaluate
import time

samples = []
eval_size = 5

for i in range(eval_size):
    entry = dataset[i]
    
    # Perform the query with a delay to limit to 1 request per second
    user_query = entry['query']
    response = rag.query(user_query)
    
    sample = SingleTurnSample(
        user_input=user_query,
        reference=entry['reference_answer'],
        response=response,
        retrieved_contexts=db.retrieve(user_query),
    )
    samples.append(sample)
    
    # Wait for 1-2 second before proceeding to the next iteration as we are limited by Mistral API
    time.sleep(2)


In [None]:
import pandas as pd
df = pd.DataFrame(samples)

# Display the DataFrame as a table
print(df)

In [None]:
## Actual Evaluation
from ragas.metrics import LLMContextPrecisionWithReference
from ragas.metrics import NonLLMContextRecall
from ragas.metrics import LLMContextRecall
from ragas.metrics import Faithfulness
from ragas.metrics import ResponseRelevancy

eval_dataset = EvaluationDataset(samples=samples)

faithfulness = Faithfulness()
context_precision = LLMContextPrecisionWithReference()
context_recall = NonLLMContextRecall()
llm_context_recall = LLMContextRecall()
answer_relevancy = ResponseRelevancy()

eval_results = evaluate(
        dataset=eval_dataset,
        metrics=[
                faithfulness,
                answer_relevancy,
                #context_recall, #This metric [non_llm_context_recall] that is used requires the following additional columns ['reference_contexts'] to be present in the dataset.
                llm_context_recall,
                context_precision,
        ],
       #llm=evaluator_llm
       raise_exceptions=False 
    )
#eval_results = evaluate(
#    dataset=eval_dataset,
#    metrics=[metric],
#llm=evaluator_llm
#)

In [None]:
evaluation_result_df = eval_results.to_pandas()
evaluation_result_df.iloc[:5]