In [3]:
!pip install ragas

Collecting ragas
  Using cached ragas-0.1.20-py3-none-any.whl.metadata (5.5 kB)
Collecting numpy (from ragas)
  Using cached numpy-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting datasets (from ragas)
  Using cached datasets-3.0.1-py3-none-any.whl.metadata (20 kB)
Collecting tiktoken (from ragas)
  Using cached tiktoken-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting langchain (from ragas)
  Using cached langchain-0.3.1-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.3 (from ragas)
  Using cached langchain_core-0.2.41-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-community (from ragas)
  Using cached langchain_community-0.3.1-py3-none-any.whl.metadata (2.8 kB)
Collecting langchain-openai (from ragas)
  Using cached langchain_openai-0.2.1-py3-none-any.whl.metadata (2.6 kB)
Collecting openai>1 (from ragas)
  Using cached openai-1.50.2-py3-none-any.whl.metadata (24 kB)
Colle

In [5]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API Key:")

In [6]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

generator_llm = ChatOpenAI(model="gpt-3.5-turbo")
critic_llm = ChatOpenAI(model="gpt-4o-mini")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

distributions = {
    simple: 0.5,
    multi_context: 0.4,
    reasoning: 0.1
}

Get the documents

In [7]:
!wget https://raw.githubusercontent.com/AI-Maker-Space/DataRepository/main/jw1.csv -O john_wick_1.csv
!wget https://raw.githubusercontent.com/AI-Maker-Space/DataRepository/main/jw2.csv -O john_wick_2.csv
!wget https://raw.githubusercontent.com/AI-Maker-Space/DataRepository/main/jw3.csv -O john_wick_3.csv
!wget https://raw.githubusercontent.com/AI-Maker-Space/DataRepository/main/jw4.csv -O john_wick_4.csv

--2024-09-29 09:47:07--  https://raw.githubusercontent.com/AI-Maker-Space/DataRepository/main/jw1.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 19628 (19K) [text/plain]
Saving to: ‘john_wick_1.csv’


2024-09-29 09:47:07 (8.06 MB/s) - ‘john_wick_1.csv’ saved [19628/19628]

--2024-09-29 09:47:07--  https://raw.githubusercontent.com/AI-Maker-Space/DataRepository/main/jw2.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 14747 (14K) [text/plain]
Saving to: ‘john_wick_2.csv’


2024-09-29 09:47:08 (7.60 MB/s) - ‘john_wick_2.csv’

Prepare the documents

In [8]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from datetime import datetime, timedelta

documents = []

for i in range(1, 5):
  loader = CSVLoader(
      file_path=f"john_wick_{i}.csv",
      metadata_columns=["Review_Date", "Review_Title", "Review_Url", "Author", "Rating"]
  )

  movie_docs = loader.load()
  for doc in movie_docs:

    # Add the "Movie Title" (John Wick 1, 2, ...)
    doc.metadata["Movie_Title"] = f"John Wick {i}"

    # convert "Rating" to an `int`, if no rating is provided - assume 0 rating
    doc.metadata["Rating"] = int(doc.metadata["Rating"]) if doc.metadata["Rating"] else 0

    # newer movies have a more recent "last_accessed_at"
    doc.metadata["last_accessed_at"] = datetime.now() - timedelta(days=4-i)

  documents.extend(movie_docs)

In [18]:
documents[0]
print(len(documents))
content_1 = documents[0].page_content
print(len(content_1))
lengths = [len(doc.page_content) for doc in documents]
average_length = sum(lengths) / len(lengths) if lengths else 0
min_length = min(lengths) if lengths else 0
max_length = max(lengths) if lengths else 0

print(f"Average Content Length: {average_length}")
print(f"Minimum Content Length: {min_length}")
print(f"Maximum Content Length: {max_length}")

100
599
Average Content Length: 533.9
Minimum Content Length: 29
Maximum Content Length: 2440


Create the questions for evaluation

We will create them once then store them for reuse

In [38]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
import pickle

chunk_size = 1000
chunk_overlap = 100
file_path = 'ragas_testset.pkl'

# load an existing ragas testset
def load_ragas_testset_if_exists():
    if os.path.exists(file_path):
        try:
            with open(file_path, 'rb') as f:
                ragas_state = pickle.load(f)
            print(f"Ragas testset loaded from {file_path}")
            return ragas_state
        except Exception as e:
            print(f"Error loading ragas testset: {e}")
            return None
    else:
        print(f"No existing ragas tesetset found at {file_path}")
        return None

# Save the ragas testset
def save_ragas_testset(testset):

    try:
        with open(file_path, 'wb') as f:
            pickle.dump(testset, f)
        print(f"Ragas testset saved to {file_path}")
    except Exception as e:
        print(f"Error saving ragas testset: {e}")


# create questions
def create_questions_for_ragas(documents, num_questions=1):
    generator_llm = ChatOpenAI(model="gpt-3.5-turbo")
    critic_llm = ChatOpenAI(model="gpt-4o-mini")
    embeddings = OpenAIEmbeddings()

    generator = TestsetGenerator.from_langchain(
        generator_llm,
        critic_llm,
        embeddings
    )
    distributions = {
        simple: 0.5,
        multi_context: 0.4,
        reasoning: 0.1
    }
    
    testset = generator.generate_with_langchain_docs(documents, num_questions, distributions, with_debugging_logs=False)
    save_ragas_testset(testset)
    return testset




In [45]:
create_questions = False
ragas_testset = None
num_questions = 40
if create_questions:
    ragas_testset = create_questions_for_ragas(documents, num_questions)
else:
    ragas_testset = load_ragas_testset_if_exists()
if ragas_testset:
    ragas_testset.to_pandas()
else:
    print("No RAGAS testset found - need to create questions")

Ragas testset loaded from ragas_testset.pkl


In [47]:
ragas_testset.to_pandas().head()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What makes John Wick stand out as a favorite r...,[: 22\nReview: John Wick is one of my favourit...,John Wick stands out as a favorite recent year...,simple,"[{'source': 'john_wick_2.csv', 'row': 22, 'Rev...",True
1,What are some examples of classic internationa...,[: 11\nReview: JOHN WICK is a rare example of ...,Some examples of classic international films s...,simple,"[{'source': 'john_wick_1.csv', 'row': 11, 'Rev...",True
2,What was the surprise hit movie starring Keanu...,"[: 6\nReview: In 2014, a Keanu Reeves revenge ...",John Wick,simple,"[{'source': 'john_wick_2.csv', 'row': 6, 'Revi...",True
3,Who does John Wick face as he is called upon b...,[: 5\nReview: Iosef's uncle still has John Wic...,"John Wick faces deadly assassins, numerous kil...",simple,"[{'source': 'john_wick_2.csv', 'row': 5, 'Revi...",True
4,"What is the genre and main actor of the film ""...",[: 21\nReview: John Wick is an action film wit...,"The genre of the film 'John Wick' is action, a...",simple,"[{'source': 'john_wick_1.csv', 'row': 21, 'Rev...",True


Set up a RAG chain for evaluating

In [48]:
from datasets import Dataset
def generate_answers(chain, testset):
    answers = []
    contexts = []
    questions = testset.to_pandas()["question"].values.tolist()
    ground_truths = testset.to_pandas()["ground_truth"].values.tolist()

    for question in questions:
        answer = chain.invoke({"question" : question})
        answers.append(answer["response"])
        contexts.append([context.page_content for context in answer["context"]])
    return Dataset.from_dict({
        "question" : questions,
        "answer" : answers,
        "contexts" : contexts,
        "ground_truth" : ground_truths
    })
