In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir("/Users/shahules/ragas/alingment-exp/")

## Load Model

In [3]:
from langchain_openai.chat_models import ChatOpenAI
from ragas.llms import LangchainLLMWrapper

llm_4o = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))
llm_4o_mini = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain_openai.embeddings import OpenAIEmbeddings
from ragas.embeddings import LangchainEmbeddingsWrapper


embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
langchain_embeddings = LangchainEmbeddingsWrapper(embeddings=embeddings) # any langchain Embeddings instance


In [5]:
from langchain_aws import ChatBedrockConverse
from langchain_aws import BedrockEmbeddings
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

config = {
    "credentials_profile_name": "default",  # E.g "default"
    "region_name": "us-east-1",  # E.g. "us-east-1"
    "llm": "anthropic.claude-3-haiku-20240307-v1:0",  # E.g "anthropic.claude-3-5-sonnet-20240620-v1:0"
}

bedrock_llm = ChatBedrockConverse(
    credentials_profile_name=config["credentials_profile_name"],
    region_name=config["region_name"],
    base_url=f"https://bedrock-runtime.{config['region_name']}.amazonaws.com",
    model=config["llm"],
)

bedrock_llm = LangchainLLMWrapper(bedrock_llm)

## Dataset

In [6]:
from datasets import Dataset

In [7]:
test_dataset = Dataset.from_json("./datasets/dataset_v4_test.json")
train_dataset = Dataset.from_json("./datasets/dataset_v4_training_annotated_full.json")

## Comparison

In [8]:
import json
def save_result(result):
    data = json.load(open("./results/aspect_critic/result.json"))
    data.append(result)
    with open("./results/aspect_critic/result.json","w") as file:
        json.dump(data,file,indent=4)
        

In [9]:
from scripts import run_evaluation

In [11]:
config ={
    
    "test_data":"./datasets/dataset_v4_test.json",
    "train_data":"./datasets/dataset_v4_training_annotated_full.json",
    "llm":llm_4o,
    "embedding_model":langchain_embeddings,
    "prompt_optimisation":False,
    "dynamic_retrieval": True,
    "search_type": "similarity",
    "num_train_samples":20,
    "batch_size":12,
    "seed":42,
    }

result = await run_evaluation(**config)
print(result)

Evaluating:   0%|                                                | 0/50 [00:00<?, ?it/s]
Generating train split: 20 examples [00:00, 940.25 examples/s]
Evaluating: 100%|███████████████████████████████████████| 50/50 [00:12<00:00,  3.89it/s]

{'score': 0.7857142857142857, 'prompt': 'Evaluate the response based on its alignment with the reference text and determine if it is correct. Consider the accuracy of the information, the relevance of the details provided, and any factual inaccuracies. Ensure that specific dates or timeframes mentioned in the reference are present in the response, as their absence can lead to misalignment. Emphasize the importance of verifying that all critical details, such as dates, are included to ensure full alignment with the reference. Provide a reason for your evaluation and assign a verdict: 1 if the response is correct and aligns well with the reference, and 0 if there are significant inaccuracies, missing critical details like dates, or misalignments.', 'metric': 'answer_correctness', 'initial_prompt': 'Evaluate the response based on its alignment with the reference text and determine if it is correct. Consider the accuracy of the information, the relevance of the details provided, and any fa




In [12]:
save_result(result)

Evaluate the provided response based on its accuracy and relevance to the reference text. Given the user_input, reference, and response, assign a verdict of 1 for correct and relevant responses, and 0 for those that contain factual inaccuracies or significant deviations. Clearly define the criteria for accuracy and relevance, emphasizing the importance of specific factual details, such as dates. Identify and highlight any factual inaccuracies in the response, even if the overall content appears relevant and accurate. Additionally, provide a nuanced evaluation by considering the completeness of the information presented, rather than solely focusing on alignment with the reference.

In [11]:
result

{'score': 0.6567164179104478,
 'prompt': 'Given the user_input, reference and response. Is the response correct compared with the reference',
 'metric': 'answer_correctness',
 'initial_prompt': 'Given the user_input, reference and response. Is the response correct compared with the reference',
 'final_prompt': 'Given the user_input, reference and response. Is the response correct compared with the reference',
 'llm': 'text-embedding-3-small',
 'embedding_model': None,
 'dynamic_retrieval': False,
 'search_type': None}

In [82]:
save_result(result)

## Analysis

In [13]:
import json
data = json.load(open("train_vectors.json"))

FileNotFoundError: [Errno 2] No such file or directory: 'train_vectors.json'

In [None]:
train_data = Dataset.from_list(data)

In [14]:
len(train_data)

NameError: name 'train_data' is not defined

In [17]:
from scripts.utils import stratified_sample_no_duplicates

In [18]:
batch_data = stratified_sample_no_duplicates(train_data,20,target_column='qdrant',seed=42)


In [None]:
[1, 4, 6, 7, 10, 13]


In [27]:
batch_data[13]

{'input': {'reference': "The fall of the Qing Dynasty was influenced by a combination of internal and external factors. Internally, the dynasty faced significant corruption, inefficiency, and a lack of modernization within its government and military. The population was discontented due to heavy taxation, land shortages, and famines, which led to widespread unrest and uprisings such as the Taiping Rebellion. Externally, the Qing Dynasty was weakened by foreign invasions and unequal treaties imposed by Western powers and Japan, which eroded its sovereignty and economic stability. The Opium Wars and the subsequent Treaty of Nanking, for example, forced China to cede Hong Kong to Britain and open up several ports to foreign trade, undermining its control over its own economy. Additionally, the failure to successfully reform and modernize in response to these challenges further contributed to the dynasty's decline, culminating in the 1911 Revolution, which ultimately led to the abdication 

In [19]:
from collections import Counter

In [20]:
Counter([data['input']['user_input'] for data in batch_data]).most_common()

[('What were the main causes and outcomes of the Wars of the Roses?', 2),
 ('What is the historical significance of the Domesday Book?', 2),
 ('What were the key outcomes and significance of the Signing of the Maastricht Treaty?',
  1),
 ('How was penicillin discovered and what impact did it have on medicine?', 1),
 ('What were the key events and outcomes of the Battle of Waterloo?', 1),
 ('What were the significant impacts of the Hiroshima and Nagasaki bombings during World War II?',
  1),
 ('What were the key events and motivations that led to the formation of the World Trade Organization (WTO)?',
  1),
 ('What was the Berlin Airlift and why was it significant in post-World War II history?',
  1),
 ('What were the causes and consequences of the Chernobyl Disaster?', 1),
 ('What were the key factors and outcomes of the Battle of Stalingrad during World War II?',
  1),
 ('What were the key factors that contributed to the success of the D-Day Invasion during World War II?',
  1),
 ('Wha

In [28]:
train_dataset = Dataset.from_json("./datasets/dataset_v4_training.json")


In [29]:
search_input = 'What were the main factors that led to the fall of the Qing Dynasty?'

In [30]:
for data in train_dataset:
    if data['user_input'] == search_input:
        break

In [31]:
data

{'user_input': 'What were the main factors that led to the fall of the Qing Dynasty?',
 'reference': "The fall of the Qing Dynasty was influenced by a combination of internal and external factors. Internally, the dynasty faced significant corruption, inefficiency, and a lack of modernization within its government and military. The population was discontented due to heavy taxation, land shortages, and famines, which led to widespread unrest and uprisings such as the Taiping Rebellion. Externally, the Qing Dynasty was weakened by foreign invasions and unequal treaties imposed by Western powers and Japan, which eroded its sovereignty and economic stability. The Opium Wars and the subsequent Treaty of Nanking, for example, forced China to cede Hong Kong to Britain and open up several ports to foreign trade, undermining its control over its own economy. Additionally, the failure to successfully reform and modernize in response to these challenges further contributed to the dynasty's decline