In [1]:
import os
import dspy
import json

In [2]:
DATA_TO_VALIDATE_PATH = os.path.join(os.getcwd(), 'processed', 'data.json')

In [3]:
# Read the dataset.
with open(DATA_TO_VALIDATE_PATH, 'r') as f:
    dataset = json.load(f)

In [4]:
dataset.keys()

dict_keys(['search_terms', 'responses', 'data_bank', 'pertinent_documents'])

In [5]:
# Print an example from each key of dataset
for key in dataset.keys():
    print(f"{key}:")
    for k,v in dataset[key].items():
        print(f"\t{k}: {v}")
        break
    print()

search_terms:
	ffb4f3dc-193a-409d-b8e6-a44ded9b5216: What was the effect of oropharyngeal anesthesia on obstructive sleep apnea in the study subjects?

responses:
	ffb4f3dc-193a-409d-b8e6-a44ded9b5216: Oropharyngeal anesthesia led to an increase in obstructive apneas and hypopneas, as well as a higher frequency of oxyhemoglobin desaturations during sleep.

data_bank:
	0: Obstructive sleep apnea following topical oropharyngeal anesthesia in loud snorers. Previous studies support the presence of an upper airway reflex mechanism that contributes to the maintenance of upper airway patency during sleep. We investigated the possibility that interference with this reflex mechanism contributes to the development of obstructive sleep apnea. Eight otherwise asymptomatic snorers (seven male and one female), age 39 +/- 5.3 yr (mean +/- SEM), underwent overnight sleep studies on three successive nights. An acclimatization night was followed by two study nights randomly assigned to control (C) and o

## RAGAS STUFF STARTS HERE

In [6]:
# !pip install ragas

In [7]:
import pandas as pd
# Creating the DataFrame
data = []
for search_term_id, search_term_text in dataset['search_terms'].items():
    response_text = dataset['responses'].get(search_term_id)
    doc_ids = dataset['pertinent_documents'].get(search_term_id, [])
    for doc_id in doc_ids:
        data_bank_text = dataset['data_bank'].get(doc_id)
        data.append({"question": search_term_text, "ground_truths": [response_text], "answer": response_text, "contexts": [data_bank_text]})

df = pd.DataFrame(data)
df.head()

Unnamed: 0,question,ground_truths,answer,contexts
0,What was the effect of oropharyngeal anesthesi...,[Oropharyngeal anesthesia led to an increase i...,Oropharyngeal anesthesia led to an increase in...,[Obstructive sleep apnea following topical oro...
1,What was the prognostic value of low neutrophi...,"[Low neutrophil function, particularly defecti...","Low neutrophil function, particularly defectiv...",[Neutrophil function and pyogenic infections i...
2,What was the treatment that resulted in both c...,[],,[Paraneoplastic vasculitic neuropathy: a treat...
3,What was the conclusion regarding the role of ...,[The conclusion was that CNS prophylaxis with ...,The conclusion was that CNS prophylaxis with r...,[Questionable role of CNS radioprophylaxis in ...
4,What are the advantages of using duplex Dopple...,"[The advantages include absence of toxicity, p...","The advantages include absence of toxicity, pr...",[Current status of duplex Doppler ultrasound i...


In [8]:
csv_synthetic_data = os.path.join(os.getcwd(), 'processed', 'synthetic_dataset.csv')
df.to_csv(csv_synthetic_data, index=False)

In [9]:
from datasets import Dataset
ds = Dataset.from_pandas(df)

In [10]:
from ragas import evaluate
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
    answer_similarity,
    context_relevancy
)

result = evaluate(
    ds,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
        answer_similarity,
        context_relevancy
    ],
)

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Evaluating:   0%|          | 0/132 [00:00<?, ?it/s]

In [11]:
result.to_pandas().head()

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Unnamed: 0,question,ground_truths,answer,contexts,ground_truth,context_precision,faithfulness,answer_relevancy,context_recall,answer_similarity,context_relevancy
0,What was the effect of oropharyngeal anesthesi...,[Oropharyngeal anesthesia led to an increase i...,Oropharyngeal anesthesia led to an increase in...,[Obstructive sleep apnea following topical oro...,Oropharyngeal anesthesia led to an increase in...,1.0,1.0,0.940725,1.0,1.0,0.083333
1,What was the prognostic value of low neutrophi...,"[Low neutrophil function, particularly defecti...","Low neutrophil function, particularly defectiv...",[Neutrophil function and pyogenic infections i...,"Low neutrophil function, particularly defectiv...",1.0,1.0,0.89694,1.0,1.0,0.071429
2,What was the treatment that resulted in both c...,[],,[Paraneoplastic vasculitic neuropathy: a treat...,,1.0,,0.94074,1.0,1.0,0.8
3,What was the conclusion regarding the role of ...,[The conclusion was that CNS prophylaxis with ...,The conclusion was that CNS prophylaxis with r...,[Questionable role of CNS radioprophylaxis in ...,The conclusion was that CNS prophylaxis with r...,1.0,1.0,0.968017,1.0,0.999998,0.142857
4,What are the advantages of using duplex Dopple...,"[The advantages include absence of toxicity, p...","The advantages include absence of toxicity, pr...",[Current status of duplex Doppler ultrasound i...,"The advantages include absence of toxicity, pr...",1.0,1.0,0.952222,1.0,1.0,1.0


## Logging to wandb, really need to have nice dashboard

In [12]:
# !pip install wandb

In [13]:
import wandb

In [14]:
# ok lets login to wandb
# wandb.login(key="your - api - key - here")

[34m[1mwandb[0m: Currently logged in as: [33mshramanpadhalni[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\Hem Chandra\.netrc


True

In [15]:

def start_wandb_run(dataset, result, project_name="medical_abstract-rag-synthetic-data-eval",
                     chunk_size=128, sentence_chunk_overlap=16):
    """
    Initializes a Weights & Biases run to log metrics, parameters, and results for tracking experiments.

    Parameters:
    - dataset: The dataset being evaluated or processed in the run.
    - result: A dictionary containing the results to log in the run.
    - project_name (str): The name of the Weights & Biases project where the run will be logged.
    - chunk_size (int): The size of the chunks to split the dataset into.
    - sentence_chunk_overlap (int): The overlap size between consecutive dataset chunks.

    Returns:
    None
    """
    try:
        # Start a new Weights & Biases run
        wandb.init(project=project_name, config={
            "chunk_size": chunk_size,
            "sentence_chunk_overlap": sentence_chunk_overlap,
            "number_of_questions": len(dataset),
            "comments": "Synthetic dataset where ground truth and the answer are the same.",
        })

        # Log the result to the current run
        wandb.log(result)

        # Finish the current run to ensure all data is synced
        wandb.finish()
    except Exception as e:
        print(f"An error occurred during the Weights & Biases operation: {e}")
        # Optionally, handle exceptions such as retrying the operation or logging the error to a file

# Start and log the wandb run
start_wandb_run(ds, result)


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.13007912028966073, max=1.…

0,1
answer_relevancy,▁
answer_similarity,▁
context_precision,▁
context_recall,▁
context_relevancy,▁
faithfulness,▁

0,1
answer_relevancy,0.93351
answer_similarity,1.0
context_precision,1.0
context_recall,1.0
context_relevancy,0.24975
faithfulness,0.97778
