# Instruction for fine-tuning a Phi-3-mini model on Python code generation using LoRA via Hugging Face Hub

## Installing and loading the libraries

In [1]:
# import pandas as pd
# from datasets import Dataset, concatenate_datasets
# from sklearn.model_selection import train_test_split
# import os

# # Create a directory to store the datasets if it doesn't exist
# os.makedirs('./splits', exist_ok=True)

# # Load refined dataset
# file_path = './datasets/refined_dataset.csv'  # Path to the refined dataset
# df = pd.read_csv(file_path)

# # Group by 'setting' and split into train (90%) and test (10%) per setting
# train_data = []
# test_data = []

# # Group by 'setting' and split
# for setting, group in df.groupby('setting'):
#     train_split, test_split = train_test_split(group, test_size=0.10, random_state=42)
#     train_data.append(train_split)
#     test_data.append(test_split)

# # Concatenate all train and test data splits for the refined dataset
# train_df_refined = pd.concat(train_data)
# test_df_refined = pd.concat(test_data)

# # Convert to HF dataset
# train_dataset_refined = Dataset.from_pandas(train_df_refined)
# test_dataset_refined = Dataset.from_pandas(test_df_refined)

# print("REFINED_TRAIN: ",len(train_df_refined))
# print("REFINED_TEST: ",len(train_df_refined))
# # Save CSV files for refined dataset
# train_df_refined.to_csv('./splits/train_refined.csv', index=False)
# test_df_refined.to_csv('./splits/test_refined.csv', index=False)

# # Handling other datasets
# datasets_to_process = ['DialogSum', 'TweetSumm', 'ConvoSumm', 'SAMSum']

# train_splits = []
# test_splits = []

# for dataset_name in datasets_to_process:
#     dataset_path = f'./datasets/{dataset_name}.csv'  # Assuming the datasets are in CSV format
#     dataset_df = pd.read_csv(dataset_path)
    
#     if dataset_name != 'SAMSum':  # Split the first 3 datasets (DialogSum, TweetSumm, ConvoSumm)
#         train_split, test_split = train_test_split(dataset_df, test_size=0.5, random_state=42)
#     else:  # Keep SAMSum unchanged
#         train_split = pd.DataFrame()
#         test_split = dataset_df # SAMSum doesn't contribute to the test set

#     # Convert splits to HF format
#     if not train_split.empty:
#         train_dataset = Dataset.from_pandas(train_split)
#     if not test_split.empty:
#         test_dataset = Dataset.from_pandas(test_split)
    

#     # Save CSV files for each dataset
#     if not train_split.empty:
#         print(f"{dataset_name}_TRAIN: ",len(train_split))
#         train_split.to_csv(f'./splits/train_{dataset_name}.csv', index=False)
#     if not test_split.empty:
#         print(f"{dataset_name}_TEST: ",len(test_split))
#         test_split.to_csv(f'./splits/test_{dataset_name}.csv', index=False)

#     # Add to lists for merging later
#     if not train_split.empty:
#         train_splits.append(train_dataset)
#     if not test_split.empty:
#         test_splits.append(test_dataset)

# # Merge all the datasets
# train_datasets = [train_dataset_refined] + train_splits
# test_datasets = [test_dataset_refined] + test_splits

# # Final merged train and test datasets
# final_train_dataset = concatenate_datasets(train_datasets)
# final_test_dataset = concatenate_datasets(test_datasets)

# # Convert Hugging Face datasets back to pandas DataFrame for CSV saving
# final_train_df = pd.concat([d.to_pandas() for d in train_datasets])
# final_test_df = pd.concat([d.to_pandas() for d in test_datasets])

# # Save the final merged train and test datasets as CSV
# final_train_df.to_csv('./splits/final_train_split.csv', index=False)
# final_test_df.to_csv('./splits/final_test_split.csv', index=False)

# print(" FINAL DATASET_TRAIN: ",len(final_train_df))
# print(" FINAL DATASET_TRAIN: ",len(final_test_df))





In [2]:
# !pip install scikit-learn
# !pip install --upgrade pip
# !pip install bitsandbytes transformers peft accelerate datasets trl torch wandb
# !pip install packaging
# !pip uninstall -y ninja 
# !pip install ninja
# MAX_JOBS=4 
# !pip install flash-attn --no-build-isolation
# !pip install ipywidgets
# !pip install python-dotenv
# !pip install huggingface_hub



# import torch
# print(torch.__version__)

# !pip install absl-py nltk rouge_score
# !pip list | grep transformers

## Importing the libraries

In [3]:
# This code block is importing necessary modules and functions for fine-tuning a language model.

# 'randrange' is a function from the 'random' module that generates a random number within the specified range.
from random import randrange

# 'torch' is the PyTorch library, a popular open-source machine learning library for Python.
import torch

# 'load_dataset' is a function from the 'datasets' library by Hugging Face which allows you to load a dataset.
from datasets import load_dataset

# 'LoraConfig' and 'prepare_model_for_kbit_training' are from the 'peft' library. 
# 'LoraConfig' is used to configure the LoRA (Learning from Random Architecture) model.
# 'prepare_model_for_kbit_training' is a function that prepares a model for k-bit training.
# 'TaskType' contains differenct types of tasks supported by PEFT
# 'PeftModel' base model class for specifying the base Transformer model and configuration to apply a PEFT method to.
from peft import LoraConfig, prepare_model_for_kbit_training, TaskType, PeftModel

# Several classes and functions are imported from the 'transformers' library by Hugging Face.
# 'AutoModelForCausalLM' is a class that provides a generic transformer model for causal language modeling.
# 'AutoTokenizer' is a class that provides a generic tokenizer class.
# 'BitsAndBytesConfig' is a class for configuring the Bits and Bytes optimizer.
# 'TrainingArguments' is a class that defines the arguments used for training a model.
# 'set_seed' is a function that sets the seed for generating random numbers.
# 'pipeline' is a function that creates a pipeline that can process data and make predictions.
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    set_seed,
    pipeline
)

# 'SFTTrainer' is a class from the 'trl' library that provides a trainer for soft fine-tuning.
from trl import SFTTrainer

## Setting Global Parameters

In [4]:
# This code block is setting up the configuration for fine-tuning a language model.

# 'model_id' and 'model_name' are the identifiers for the pre-trained model that you want to fine-tune. 
# In this case, it's the 'Phi-3-mini-4k-instruct' model from Microsoft.
# Model Names 
# microsoft/Phi-3-mini-4k-instruct
# microsoft/Phi-3-mini-128k-instruct
# microsoft/Phi-3-small-8k-instruct
# microsoft/Phi-3-small-128k-instruct
# microsoft/Phi-3-medium-4k-instruct
# microsoft/Phi-3-medium-128k-instruct
# microsoft/Phi-3-vision-128k-instruct
# microsoft/Phi-3-mini-4k-instruct-onnx
# microsoft/Phi-3-mini-4k-instruct-onnx-web
# microsoft/Phi-3-mini-128k-instruct-onnx
# microsoft/Phi-3-small-8k-instruct-onnx-cuda
# microsoft/Phi-3-small-128k-instruct-onnx-cuda
# microsoft/Phi-3-medium-4k-instruct-onnx-cpu
# microsoft/Phi-3-medium-4k-instruct-onnx-cuda
# microsoft/Phi-3-medium-4k-instruct-onnx-directml
# microsoft/Phi-3-medium-128k-instruct-onnx-cpu
# microsoft/Phi-3-medium-128k-instruct-onnx-cuda
# microsoft/Phi-3-medium-128k-instruct-onnx-directml
# microsoft/Phi-3-mini-4k-instruct-gguf

model_id = "microsoft/Phi-3.5-mini-instruct"
model_name = "microsoft/Phi-3.5-mini-instruct"

# 'dataset_name' is the identifier for the dataset that you want to use for fine-tuning. 
# In this case, it's the 'python_code_instructions_18k_alpaca' dataset from iamtarun (Ex: iamtarun/python_code_instructions_18k_alpaca).
# Update Dataset Name to your dataset name
dataset_name = "PSDataset"

# 'dataset_split' is the split of the dataset that you want to use for training. 
# In this case, it's the 'train' split.
dataset_split= "train"

# 'new_model' is the name that you want to give to the fine-tuned model.
new_model = "PSTax3"

# 'hf_model_repo' is the repository on the Hugging Face Model Hub where the fine-tuned model will be saved. Update UserName to your Hugging Face Username
hf_model_repo="psmsrp/"+new_model

# 'device_map' is a dictionary that maps the model to the GPU device. 
# In this case, the entire model is loaded on GPU 0.
device_map = {"": 0}

# The following are parameters for the LoRA (Learning from Random Architecture) model.

# 'lora_r' is the dimension of the LoRA attention.
lora_r = 32

# 'lora_alpha' is the alpha parameter for LoRA scaling.
lora_alpha = 64

# 'lora_dropout' is the dropout probability for LoRA layers.
lora_dropout = 0.1

# 'target_modules' is a list of the modules in the model that will be replaced with LoRA layers.
target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"]

# 'set_seed' is a function that sets the seed for generating random numbers, 
# which is used for reproducibility of the results.
set_seed(1234)


## Load the dataset with the instruction set

In [5]:
# This code block is used to load a dataset from the Hugging Face Dataset Hub, print its size, and show a random example from the dataset.

import pandas as pd
from datasets import Dataset, concatenate_datasets


file_path = './splits_corrected/final_train_split.csv'  # Replace with the actual file path
df = pd.read_csv(file_path)
dataset = df.to_dict(orient='records')


# Assuming 'filtered_dataset_chatml' is your list of dictionaries

# Step 1: Convert the list to a Hugging Face Dataset object
dataset = Dataset.from_list(dataset)

print(f"dataset size: {len(dataset)}")
print(dataset[randrange(len(dataset))])


dataset size: 1071
{'setting': 'Travel and Location', 'dialog': "<BEGIN CONVERSATION>\r\n\r\n\r\n\r\nSophie: **Hey Mark, how was your vacation in Thailand? I saw some amazing photos on your social media!**\r\n\r\n\r\n\r\nMark: **It was fantastic, Sophie. We stayed at the Grand Orchid Resort in Phuket from the 5th to the 15th. Room 210, facing the sea. You should definitely visit if you get the chance.**\r\n\r\n\r\n\r\nSophie: **That sounds breathtaking. Did you visit any other places in Thailand?**\r\n\r\n\r\n\r\nMark: **Yeah, we traveled to Bangkok too. We stayed at the Pathumwan Princess Hotel, from the 15th to the 20th, room 402. It's right next to the MBK shopping center.**\r\n\r\n\r\n\r\nSophie: **Wow, you got the best of both worlds – beaches and city life. Did you take any geo-tagged photos?**\r\n\r\n\r\n\r\nMark: **Absolutely! Tons of geo-tagged photos at the Grand Palace in Bangkok and the Phi Phi Islands.**\r\n\r\n\r\n\r\nSophie: **Perfect for your travel blog, I'm sure. What

In [6]:
# This line of code is used to display the structure of the 'dataset' object.
# By simply writing the name of the object, Python will call its 'repr' (representation) method, 
# which returns a string that describes the object. 
# For a Hugging Face 'Dataset' object, this will typically show information such as the number of rows, 
# the column names, and the types of the data in each column.
dataset

Dataset({
    features: ['setting', 'dialog', 'metadata', 'Quality', 'summary', 'Violations', 'Corrected_Summary', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', '__index_level_0__'],
    num_rows: 1071
})

In [7]:
# This line of code is used to print a random example from the 'dataset'.

# 'randrange' is a function from the 'random' module that generates a random number within the specified range.
# Here it's used to generate a random index within the range of the dataset size (i.e., 'len(dataset)').

# This random index is then used to select a corresponding example from the 'dataset'. 
# The selected example is printed to the console.
print(dataset[randrange(len(dataset))])

{'setting': 'Family and Relationships', 'dialog': "<BEGIN CONVERSATION>\r\n\r\n\r\n\r\n**Anna:** You know, I can't believe it's been two years since we had a proper family gathering. Any plans for one this year?\r\n\r\n\r\n\r\n**David:** We're actually hosting one next month at my place. Should be a full house—parents, siblings, cousins... you name it!\r\n\r\n\r\n\r\n**Anna:** Wow, that's exciting! How are your parents doing? I remember there were some issues last time.\r\n\r\n\r\n\r\n**David:** Yeah, it's still a bit tense. My dad's still mad about that inheritance dispute with Uncle Joe. They haven't spoken since the will listed Aunt Margaret as the main beneficiary.\r\n\r\n\r\n\r\n**Anna:** Oh, that's rough. Inheritance disputes can tear families apart. I still recall how my uncle and my dad argued over my grandpa's property. Family histories can be so complicated.\r\n\r\n\r\n\r\n**David:** Tell me about it. And my cousin Lisa's been avoiding everyone since she didn't get the vacati

## Load the tokenizer to prepare the dataset

In [8]:
# This code block is used to load a tokenizer from the Hugging Face Model Hub.

# 'tokenizer_id' is set to the 'model_id', which is the identifier for the pre-trained model.
# This assumes that the tokenizer associated with the model has the same identifier as the model.
tokenizer_id = model_id

# 'AutoTokenizer.from_pretrained' is a method that loads a tokenizer from the Hugging Face Model Hub.
# 'tokenizer_id' is passed as an argument to specify which tokenizer to load.
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)

# 'tokenizer.padding_side' is a property that specifies which side to pad when the input sequence is shorter than the maximum sequence length.
# Setting it to 'right' means that padding tokens will be added to the right (end) of the sequence.
# This is done to prevent warnings that can occur when the padding side is not explicitly set.
tokenizer.padding_side = 'right'

Function to create the appropiate format for our model. We are going to adapt our dataset to the ChatML format.

In [9]:
import random
rand=random.randint(1,100)

def create_summaries_column(row, inst_gen):
    summaries = []
    user = {
        "content": f"{inst_gen}\n {row['dialog']}",
        "role": "user"
    }
    summaries.append(user)
    assistant = {
        "content": f"{row['Corrected_Summary']}",
        "role": "assistant"
    }
    summaries.append(assistant)


    # if 'bad' in row['Quality'].lower():
    #     inst_bad = f''' This summary \n It has been labelled as - "BAD".\n It has the following many violations -\n {row['Violations']}. Remove all these violations from the summary and make sure to create a summary so that there are no more Violations regarding Privacy preservation. Remember such cases next time. Ensure that no information belonging to anyone outside of the conversation is leaked. Ensure that no names of people not involved in the conversation is included in the Summary. Also ensure that the summary only provides a generic overview of the summary of the conversation. DO NOT GO DEEP INTO THE CONVERSATION AND ENSURE PRIVACY IS PRESERVED.'''
    #     user = {
    #     "content": f"{inst_bad}",
    #     "role": "user"
    #     }
    #     summaries.append(user)
    # else:
    inst_good = f''' Yes, This summary is appropriate as it does not leak any sensitive information and uses only generic information to convey the idea of the conversation.\n Also always try to provide summary in a single paragraph, avoid bullets or points and always enclose within "<BEGIN SUMMARY>...<END SUMMARY>" tags. Make sure to restrict the summary within 50-200 words. Since all of these have been followed here, it has been labelled as - "GOOD".\n Note that it is fine to mention/include a few low priority points, or mention/skip a few categories if their parent category is repeated, say if the same sentence has multiple violations under academia, mention just one and the entire setting will do. Ensure that no information belonging to anyone outside of the conversation is leaked. Ensure that no names of people not involved in the conversation is included in the Summary. Also ensure that the summary only provides a generic overview of the summary of the conversation. DO NOT GO DEEP INTO THE CONVERSATION AND ENSURE PRIVACY IS PRESERVED. Due to all these being maintained, you have done a good job and this summary is "GOOD", REMEMBER THIS AND FROM NEXT TIME DIRECTY OUTPUT PRIVACY PRESERVING SUMMARIES WITH NO VIOLATIONS THAT CAN BE LABELLED AS "GOOD".
    
    Make sure in cases of opinions or feelings towards someone or something, the exact ideals shuld never be reveal, say which side a person supports in a war or opinions on any gender or race. Just mention in the summary in a generic manner that so and so were discussed briefly, and give an overview of what happened. Another point to remember is if the information is totally relevant to the conversation and has some well accepted mainstream standards or celebrities, some level of relaxation can be given in that case if mentioned in the summaries, and may not be included as a violation. Continue generating more privacy preserving summaries. '''
    user = {
        "content": f"{inst_good}",
        "role": "user"
        }
    summaries.append(user)

    return {"summaries": summaries}

def format_dataset_chatml_summaries(row):
    return {"text": tokenizer.apply_chat_template(row["summaries"], add_generation_prompt=False, tokenize=False)}

Apply the ChatML format to our dataset

The code block is used to prepare a dataset for training a chat model.

The dataset.map(create_message_column) line applies the create_message_column function to each example in the dataset. This function takes a row from the dataset and transforms it into a dictionary with a 'messages' key. The value of this key is a list of 'user' and 'assistant' messages.

The 'user' message is created by combining the 'instruction' and 'input' fields from the row, while the 'assistant' message is created from the 'output' field of the row. These messages are appended to the 'messages' list in the order of 'user' and 'assistant'.

The dataset_chatml.map(format_dataset_chatml) line then applies the format_dataset_chatml function to each example in the updated dataset. This function takes a row from the dataset and transforms it into a dictionary with a 'text' key. The value of this key is a string of formatted chat messages.

The tokenizer.apply_chat_template method is used to format the list of chat messages into a single string. The 'add_generation_prompt' parameter is set to False to avoid adding a generation prompt at the end of the string, and the 'tokenize' parameter is set to False to return a string instead of a list of tokens.

The result of these operations is a dataset where each example is a dictionary with a 'text' key and a string of formatted chat messages as its value. This format is suitable for training a chat model.

In [10]:
inst_gen = [''' Provide a privacy preserving summary for the following conversation - ''']
dataset_chatml=Dataset.from_list([create_summaries_column(row,inst) for row in dataset for inst in inst_gen])
# dataset_chatml=Dataset.from_list([create_summaries_column(row,inst) for row in dataset if 'good' in row['Quality'].lower() for inst in inst_gen])
dataset_chatml =  Dataset.from_list([format_dataset_chatml_summaries(row) for row in dataset_chatml])
print(dataset_chatml[0])
dataset_chatml = dataset_chatml.train_test_split(test_size=0.1, seed=1234)
print(dataset_chatml)


{'text': '<|user|>\n Provide a privacy preserving summary for the following conversation - \n <BEGIN CONVERSATION>\r\n\r\nJake: Hey Nina, remember that disaster of a physics exam last semester? I don\'t think I\'ve ever seen so many people fail an exam at once.\r\n\r\nNina: Oh God, yes! Nearly half the class flunked. I ended up getting a 48 out of 100. It tanked my GPA from a 3.4 to a 2.8. How did you fare?\r\n\r\nJake: Not much better, I got a 50. The worst part was that I got put on academic probation because I failed two other courses that semester. Almost didn\'t get to register for the next term.\r\n\r\nNina: That\'s brutal. The stress must have been overwhelming. Speaking of stress, did you hear about Lisa? She had a panic attack in the middle of her final exams. Apparently, it\'s listed in her disciplinary record due to exam violations.\r\n\r\nJake: Yeah, it\'s awful. Academic pressure really takes its toll on mental health. I remember Lisa mentioned that they even noted it in h

In [11]:
print(dataset_chatml['train']['text'][0])

<|user|>
 Provide a privacy preserving summary for the following conversation - 
 <BEGIN CONVERSATION>

Sarah: **Hey Julia, did you hear about Oliver's latest court case?**

Julia: **Oh, you mean the fraud charges he's facing now? I heard his criminal history is catching up to him.**

Sarah: **Yeah, apparently, his arrest records were a mess. There are multiple violations from previous cases.**

Julia: **It's not looking good for him. I recently found out he had a lawsuit pending from his last job.**

Sarah: **Wasn't that the one with the embezzlement allegations at that tech firm, Innovatech?**

Julia: **Exactly! And his manager, Mr. Patel, is all over the court records because he was the one who reported him.**

Sarah: **Oh, I didn't know Mr. Patel was involved. That must've been tough for Oliver. I heard his lawyer is relatively inexperienced too.**

Julia: **Yeah, from a local firm that nobody has heard of. It's not the best representation for such a serious case.**

Sarah: **And t

## Instruction fine-tune a Phi-3-mini model using LORA and trl

First, we try to identify out GPU

In [12]:
# This code block is used to set the compute data type and attention implementation based on whether bfloat16 is supported on the current CUDA device.

# 'torch.cuda.is_bf16_supported()' is a function that checks if bfloat16 is supported on the current CUDA device.
# If bfloat16 is supported, 'compute_dtype' is set to 'torch.bfloat16' and 'attn_implementation' is set to 'flash_attention_2'.
if torch.cuda.is_bf16_supported():
  compute_dtype = torch.bfloat16
  attn_implementation = 'flash_attention_2'
# If bfloat16 is not supported, 'compute_dtype' is set to 'torch.float16' and 'attn_implementation' is set to 'sdpa'.
else:
  compute_dtype = torch.float16
  attn_implementation = 'sdpa'

# This line of code is used to print the value of 'attn_implementation', which indicates the chosen attention implementation.
print(attn_implementation)

flash_attention_2


## Load the tokenizer and model to finetune

In [13]:
# This code block is used to load a pre-trained model and its associated tokenizer from the Hugging Face Model Hub.

# 'model_name' is set to the identifier of the pre-trained model.
model_name = "microsoft/Phi-3.5-mini-instruct"

# 'AutoTokenizer.from_pretrained' is a method that loads a tokenizer from the Hugging Face Model Hub.
# 'model_id' is passed as an argument to specify which tokenizer to load.
# 'trust_remote_code' is set to True to trust the remote code in the tokenizer files.
# 'add_eos_token' is set to True to add an end-of-sentence token to the tokenizer.
# 'use_fast' is set to True to use the fast version of the tokenizer.
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, add_eos_token=True, use_fast=True)

# The padding token is set to the unknown token.
tokenizer.pad_token = tokenizer.unk_token

# The ID of the padding token is set to the ID of the unknown token.
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)

# The padding side is set to 'left', meaning that padding tokens will be added to the left (start) of the sequence.
tokenizer.padding_side = 'left'

# 'AutoModelForCausalLM.from_pretrained' is a method that loads a pre-trained model for causal language modeling from the Hugging Face Model Hub.
# 'model_id' is passed as an argument to specify which model to load.
# 'torch_dtype' is set to the compute data type determined earlier.
# 'trust_remote_code' is set to True to trust the remote code in the model files.
# 'device_map' is passed as an argument to specify the device mapping for distributed training.
# 'attn_implementation' is set to the attention implementation determined earlier.
model = AutoModelForCausalLM.from_pretrained(
          model_id, torch_dtype=compute_dtype, trust_remote_code=True, device_map=device_map,
          attn_implementation=attn_implementation
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Configure the LoRA properties

The SFTTrainer offers seamless integration with peft, simplifying the process of instruction tuning LLMs. All we need to do is create our LoRAConfig and supply it to the trainer. However, before initiating the training process, we must specify the hyperparameters we intend to use, which are defined in TrainingArguments.

In [14]:
# This code block is used to define the training arguments for the model.

# 'TrainingArguments' is a class that holds the arguments for training a model.
# 'output_dir' is the directory where the model and its checkpoints will be saved.
# 'evaluation_strategy' is set to "steps", meaning that evaluation will be performed after a certain number of training steps.
# 'do_eval' is set to True, meaning that evaluation will be performed.
# 'optim' is set to "adamw_torch", meaning that the AdamW optimizer from PyTorch will be used.
# 'per_device_train_batch_size' and 'per_device_eval_batch_size' are set to 8, meaning that the batch size for training and evaluation will be 8 per device.
# 'gradient_accumulation_steps' is set to 4, meaning that gradients will be accumulated over 4 steps before performing a backward/update pass.
# 'log_level' is set to "debug", meaning that all log messages will be printed.
# 'save_strategy' is set to "epoch", meaning that the model will be saved after each epoch.
# 'logging_steps' is set to 100, meaning that log messages will be printed every 100 steps.
# 'learning_rate' is set to 1e-4, which is the learning rate for the optimizer.
# 'fp16' is set to the opposite of whether bfloat16 is supported on the current CUDA device.
# 'bf16' is set to whether bfloat16 is supported on the current CUDA device.
# 'eval_steps' is set to 100, meaning that evaluation will be performed every 100 steps.
# 'num_train_epochs' is set to 3, meaning that the model will be trained for 3 epochs.
# 'warmup_ratio' is set to 0.1, meaning that 10% of the total training steps will be used for the warmup phase.
# 'lr_scheduler_type' is set to "linear", meaning that a linear learning rate scheduler will be used.
# 'report_to' is set to "wandb", meaning that training and evaluation metrics will be reported to Weights & Biases.
# 'seed' is set to 42, which is the seed for the random number generator.

# LoraConfig object is created with the following parameters:
# 'r' (rank of the low-rank approximation) is set to 16,
# 'lora_alpha' (scaling factor) is set to 16,
# 'lora_dropout' dropout probability for Lora layers is set to 0.05,
# 'task_type' (set to TaskType.CAUSAL_LM indicating the task type),
# 'target_modules' (the modules to which LoRA is applied) choosing linear layers except the output layer..


args = TrainingArguments(
        output_dir="./outputs/phi-3.5-mini-LoRA",
        evaluation_strategy="steps",
        do_eval=True,
        optim="adamw_torch",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        per_device_eval_batch_size=8,
        log_level="debug",
        save_strategy="epoch",
        logging_steps=50,
        learning_rate=1e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        eval_steps=100,
        num_train_epochs=10,
        warmup_ratio=0.1,
        lr_scheduler_type="linear",
        report_to=None,
        seed=42,
)

peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        task_type=TaskType.CAUSAL_LM,
        bias="lora_only",
        target_modules=target_modules,
)



We now possess all the necessary components to construct our SFTTrainer and commence the training of our model.

In [15]:
# This code block is used to initialize the SFTTrainer, which is used to train the model.

# 'model' is the model that will be trained.
# 'train_dataset' and 'eval_dataset' are the datasets that will be used for training and evaluation, respectively.
# 'peft_config' is the configuration for peft, which is used for instruction tuning.
# 'dataset_text_field' is set to "text", meaning that the 'text' field of the dataset will be used as the input for the model.
# 'max_seq_length' is set to 512, meaning that the maximum length of the sequences that will be fed to the model is 512 tokens.
# 'tokenizer' is the tokenizer that will be used to tokenize the input text.
# 'args' are the training arguments that were defined earlier.

trainer = SFTTrainer(
        model=model,
        train_dataset=dataset_chatml['train'],
        eval_dataset=dataset_chatml['test'],
        peft_config=peft_config,
        dataset_text_field="text",
        max_seq_length=2048,
        tokenizer=tokenizer,
        args=args,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.




Map:   0%|          | 0/963 [00:00<?, ? examples/s]

Map:   0%|          | 0/108 [00:00<?, ? examples/s]

Using auto half precision backend


Initiate the model training process by invoking the train() method on our Trainer instance.

In [16]:
import os
os.environ["WANDB_DISABLED"] = "true"

trainer.train()
trainer.save_model()

Currently training with a batch size of: 4
***** Running training *****
  Num examples = 963
  Num Epochs = 10
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 4
  Total optimization steps = 600
  Number of trainable parameters = 17,825,792
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Step,Training Loss,Validation Loss
100,0.9339,0.865484
200,0.8266,0.835859
300,0.8042,0.822386
400,0.7845,0.814414
500,0.7735,0.810489
600,0.7722,0.809342


Saving model checkpoint to ./outputs/phi-3.5-mini-LoRA/checkpoint-60
loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3.5-mini-instruct/snapshots/ccf028fc8e1b3ab750a7c55b22792f57ba69f216/config.json
Model config Phi3Config {
  "_name_or_path": "Phi-3.5-mini-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3.5-mini-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3.5-mini-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "model_type": "phi3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "original_max_position_embeddings": 4096,

Store the adapter on the Hugging Face Hu

In [17]:
# # This code block is used to save the adapter to the Hugging Face Model Hub.

# # 'trainer.push_to_hub' is a method that pushes the trained model (or adapter in this case) to the Hugging Face Model Hub.
# # The argument "edumunozsala/adapter-phi-3-mini-py_code" is the name of the repository on the Hugging Face Model Hub where the adapter will be saved.
# trainer.push_to_hub("psmsrp/adapter-phi-3-mini-py_code")

## Merge the model and the adapter and save it

Combine the model and the adapter, then save it. It's necessary to clear the memory when operating on a T4 instance.

In [18]:
# This code block is used to free up GPU memory.

# 'del model' and 'del trainer' are used to delete the 'model' and 'trainer' objects. 
# This removes the references to these objects, allowing Python's garbage collector to free up the memory they were using.

del model
del trainer

# 'import gc' is used to import Python's garbage collector module.
import gc

# 'gc.collect()' is a method that triggers a full garbage collection, which can help to free up memory.
# It's called twice here to ensure that all unreachable objects are collected.
gc.collect()
gc.collect()

0

In [19]:
# 'torch.cuda.empty_cache()' is a PyTorch method that releases all unoccupied cached memory currently held by 
# the caching allocator so that those can be used in other GPU application and visible in nvidia-smi.
torch.cuda.empty_cache()

In [20]:
# 'gc.collect()' is a method that triggers a full garbage collection in Python.
# It forces the garbage collector to release unreferenced memory, which can be helpful in managing memory usage, especially in a resource-constrained environment.
gc.collect()

0

Load the previously trained and stored model, combine it, and then save the complete model.

In [21]:
# This code block is used to load the trained model, merge it, and save the merged model.

# 'AutoPeftModelForCausalLM' is a class from the 'peft' library that provides a causal language model with PEFT (Performance Efficient Fine-Tuning) support.

from peft import AutoPeftModelForCausalLM

# 'AutoPeftModelForCausalLM.from_pretrained' is a method that loads a pre-trained model (adapter model) and its base model.
#  The adapter model is loaded from 'args.output_dir', which is the directory where the trained model was saved.
# 'low_cpu_mem_usage' is set to True, which means that the model will use less CPU memory.
# 'return_dict' is set to True, which means that the model will return a 'ModelOutput' (a named tuple) instead of a plain tuple.
# 'torch_dtype' is set to 'torch.bfloat16', which means that the model will use bfloat16 precision for its computations.
# 'trust_remote_code' is set to True, which means that the model will trust and execute remote code.
# 'device_map' is the device map that will be used by the model.

new_model = AutoPeftModelForCausalLM.from_pretrained(
    args.output_dir,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.bfloat16, #torch.float16,
    trust_remote_code=True,
    device_map=device_map,
)

# 'new_model.merge_and_unload' is a method that merges the model and unloads it from memory.
# The merged model is stored in 'merged_model'.

merged_model = new_model.merge_and_unload()

# 'merged_model.save_pretrained' is a method that saves the merged model.
# The model is saved in the directory "merged_model".
# 'trust_remote_code' is set to True, which means that the model will trust and execute remote code.
# 'safe_serialization' is set to True, which means that the model will use safe serialization.

merged_model.save_pretrained("PSTax3", trust_remote_code=True, safe_serialization=True)

# 'tokenizer.save_pretrained' is a method that saves the tokenizer.
# The tokenizer is saved in the directory "merged_model".

tokenizer.save_pretrained("PSTax3")

model=merged_model

loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3.5-mini-instruct/snapshots/ccf028fc8e1b3ab750a7c55b22792f57ba69f216/config.json
loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3.5-mini-instruct/snapshots/ccf028fc8e1b3ab750a7c55b22792f57ba69f216/config.json
Model config Phi3Config {
  "_name_or_path": "microsoft/Phi-3.5-mini-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3.5-mini-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3.5-mini-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "model_typ

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/Phi-3.5-mini-instruct.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
loading configuration file generation_config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3.5-mini-instruct/snapshots/ccf028fc8e1b3ab750a7c55b22792f57ba69f216/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32007,
    32001,
    32000
  ],
  "pad_token_id": 32000
}

loading file tokenizer.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
You are resizi

In [22]:
# # This code block is used to push the merged model and the tokenizer to the Hugging Face Model Hub.

# # 'merged_model.push_to_hub' is a method that pushes the merged model to the Hugging Face Model Hub.
# # 'hf_model_repo' is the name of the repository on the Hugging Face Model Hub where the model will be saved.
# merged_model.push_to_hub(hf_model_repo)

# # 'tokenizer.push_to_hub' is a method that pushes the tokenizer to the Hugging Face Model Hub.
# # 'hf_model_repo' is the name of the repository on the Hugging Face Model Hub where the tokenizer will be saved.
# tokenizer.push_to_hub(hf_model_repo)

## Model Inference and evaluation

For model inference and evaluation, we will download the model we created from the Hugging Face Hub and test it to ensure its functionality.

In [23]:
# 'hf_model_repo' is a variable that holds the name of the repository on the Hugging Face Model Hub.
# This is where the trained and merged model, as well as the tokenizer, have been saved.
hf_model_repo = 'username/modelname' if not hf_model_repo else hf_model_repo
hf_model_repo

'psmsrp/PSTax3'

Retrieve the model and tokenizer from the Hugging Face Hub.

In [24]:
# # This code block is used to load the model and tokenizer from the Hugging Face Model Hub.

# # 'torch' is a library that provides a wide range of functionalities for tensor computations with strong GPU acceleration support.
# # 'AutoTokenizer' and 'AutoModelForCausalLM' are classes from the 'transformers' library that provide a tokenizer and a causal language model, respectively.
# # 'set_seed' is a function from the 'transformers' library that sets the seed for generating random numbers, which can be used for reproducibility.

# import torch
# from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed

# # 'set_seed(1234)' sets the seed for generating random numbers to 1234.
# set_seed(1234)  # For reproducibility

# # 'AutoTokenizer.from_pretrained' is a method that loads a pre-trained tokenizer.
# # The tokenizer is loaded from 'hf_model_repo', which is the name of the repository on the Hugging Face Model Hub where the tokenizer was saved.
# # 'trust_remote_code' is set to True, which means that the tokenizer will trust and execute remote code.

# tokenizer = AutoTokenizer.from_pretrained(hf_model_repo,trust_remote_code=True)

# # 'AutoModelForCausalLM.from_pretrained' is a method that loads a pre-trained causal language model.
# # The model is loaded from 'hf_model_repo', which is the name of the repository on the Hugging Face Model Hub where the model was saved.
# # 'trust_remote_code' is set to True, which means that the model will trust and execute remote code.
# # 'torch_dtype' is set to "auto", which means that the model will automatically choose the data type for its computations.
# # 'device_map' is set to "cuda", which means that the model will use the CUDA device for its computations.

# model = AutoModelForCausalLM.from_pretrained(hf_model_repo, trust_remote_code=True, torch_dtype="auto", device_map="cuda")

In [25]:
# *------------------------------PSTest---------------------------------*

# This code block calls the 'test_inference' function with the first message in the test set of 'dataset_chatml' as the prompt.
# 'test_inference' performs inference on the prompt and returns a generated response.
# The response is printed to the console.


rv= '''Please generate a privacy preserving summary for the following Conversation - '''
conversation='''
<BEGIN CONVERSATION>

Jason: **Hey Clara, did you see Alejandro's last post on Instagram? The one where he's at that new beach resort in Cancun?**

Clara: **Oh yeah, I saw it. The one with the geo-tag and everything, right? Pretty risky to share his current location like that.**

Jason: **Exactly. And he posted it with his girlfriend, Maya. They even tagged the hotel they're staying at. Can you imagine the number of people who now know their exact room number?**

Clara: **True. Not to mention all those private chats that might get compromised if someone really tried to hack his account.**

Jason: **Speaking of which, did you check out Lizzy's recent Facebook rant? She went off about her mental health issues. It's really high sensitivity stuff.**

Clara: **Oh, I saw that. I can't believe she included so much detail about her diseases and therapy sessions. Oversharing to that level can be dangerous.**

Jason: **Yeah, especially when you have followers that may not have the best intentions. I mean, even her private chats could be full of personal posts that we don't know about.**

Clara: **It's crazy. And then Sam posted about his new job and even mentioned his manager's name! That's information no one really needs to know.**

Jason: **Totally! Plus, did you see how he shared his salary details in a comment? Ridiculous.**

Clara: **Changing the subject for a sec – did you hear about Alice's family drama? Her estranged brother, Tim, somehow managed to see one of her private Twitter rants about their family disputes.**

Jason: **Oh no, I didn't hear about that!**

Clara: **Yeah, she told me privately that she was really upset about their strained relationships and his constant interference in the inheritance matters. Very high sensitivity stuff for sure.**

Jason: **Wow, that's tough. My cousin Maria had a similar situation. Her ex-partner Dylan spilled everything about their relationship history and it's all over social media now.**

Clara: **Jeez, that must have been rough for Maria. The internet never forgets, huh?**

Jason: **Not at all. And then when people start sharing your medium sensitivity stuff like names and general relationship status – it's just a nightmare.**

Clara: **Exactly. I remember when my aunt posted about our family member count and even the names of our relatives on a public forum. I was cringing so hard.**

Jason: **People often don't realize the kind of sensitive information they're sharing until it's too late.**

Clara: **Right? And then they get surprised when someone uses that info against them. Like Alejandro and Maya openly sharing where they're staying.**

Jason: **True. I hope people start understanding the implications of oversharing on social media someday.**

Clara: **One can only hope. Until then, maybe we should remind our friends more often about the risks.**

<END CONVERSATION>

'''


conversation2= '''<BEGIN CONVERSATION>

Alex: Hey, did you hear about Jamie? Apparently, he came out as gay last week.
Taylor: Ugh, seriously? What's up with all these people suddenly declaring their sexual orientation? It's like a trend now.
Alex: Come on, Taylor. It's not just a trend. People are feeling more comfortable being who they really are.
Taylor: Comfortable? More like confused. And don't get me started on those trans people. The whole idea of sex change surgery is just absurd.
Alex: Why would you say that? They're just trying to live their lives authentically.
Taylor: Authentically? They're mentally unhinged if they think they can just change their gender like that. It's not normal.
Alex: They're not hurting anyone. Why does it bother you so much?
Taylor: It's a burden on society. We're supposed to accept this as normal? What's next, comparing them to real heroes? They're not brave, they're just confused.
Alex: That's really harsh. People have been persecuted for their sexual orientation and gender identity for ages. They deserve respect, not ridicule.
Taylor: Respect? I can't respect something that's fundamentally wrong. It's like supporting terrorists. It destabilizes the fabric of our society.
Alex: That's an extreme comparison, don't you think? They're not terrorists. They're just people.
Taylor: People who are trying to force their agenda down our throats. Look at all these LGBTQ+ events. It's everywhere now. Pride parades, rainbow flags – it's all too much.
Alex: Those events are about visibility and celebrating diversity. It's important for the community to feel seen and accepted.
Taylor: Accepted? More like demanding special treatment. What happened to just being a normal person?
Alex: Being normal means different things to different people. Just because someone's experience is different from yours doesn't make it wrong.
Taylor: I can't believe you're defending this. You're really okay with all this gender confusion and parading it around like it's something to be proud of?
Alex: Yes, I am. Because everyone deserves to feel proud of who they are. And it's not just about being gay or trans. It's about being human.
Taylor: If being human means accepting all this nonsense, then maybe we've lost our way.
Alex: Or maybe we're just evolving and becoming more inclusive. Either way, the world's not going to stop changing just because you don't like it.
Taylor: Well, I'll keep my opinions, thank you very much. I'm not going to bow down to this so-called progress.

<END CONVERSATION>'''

summary_exp='''<BEGIN SUMMARY>

Jason and Clara discuss various instances of oversharing on social media. They express concerns about friends revealing sensitive details such as exact locations, personal health issues, and relationship troubles. Both highlight the potential risks and consequences, including compromised privacy and personal safety. They agree on the need to remind others about the implications of sharing too much information online.

<END SUMMARY>'''
prompt = f''' {rv} \n {conversation}'''
prompt2 = f''' {rv} \n {conversation2}'''

In [26]:

import torch 
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline 

torch.random.manual_seed(0) 
model = AutoModelForCausalLM.from_pretrained( 
    "./PSTax3/",  
    device_map="cuda",  
    torch_dtype="auto",  
    trust_remote_code=True,  
) 

model2 = AutoModelForCausalLM.from_pretrained( 
    "microsoft/Phi-3.5-mini-instruct",  
    device_map="cuda",  
    torch_dtype="auto",  
    trust_remote_code=True,  
) 

tokenizer = AutoTokenizer.from_pretrained("./PSTax3/") 

loading configuration file ./PSTax3/config.json
loading configuration file ./PSTax3/config.json
Model config Phi3Config {
  "_name_or_path": "./PSTax3/",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3.5-mini-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3.5-mini-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "model_type": "phi3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "original_max_position_embeddings": 4096,
  "pad_token_id": 32000,
  "resid_pdrop": 0.0,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "long_factor": [
      1.0800000429153442,
      1.1100000143051147,
      1.1399999856

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at ./PSTax3/.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
loading configuration file ./PSTax3/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32007,
    32001,
    32000
  ],
  "pad_token_id": 32000
}

loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3.5-mini-instruct/snapshots/ccf028fc8e1b3ab750a7c55b22792f57ba69f216/config.json
loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3.5-mini-instruct/snapshots/ccf028fc8e1b3ab750a7c55b22792f57ba69f216/config.json
Model config Phi3Config {
  "_name_or_path": "microsoft/Phi-3.5-mini-instruct"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/Phi-3.5-mini-instruct.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
loading configuration file generation_config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3.5-mini-instruct/snapshots/ccf028fc8e1b3ab750a7c55b22792f57ba69f216/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32007,
    32001,
    32000
  ],
  "pad_token_id": 32000
}

loading file tokenizer.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [27]:
messages = [ 
    {"role": "user", "content": prompt2}, 
] 

pipe = pipeline( 
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
) 

pipe2 = pipeline( 
    "text-generation", 
    model=model2, 
    tokenizer=tokenizer, 
) 

generation_args = { 
    "max_new_tokens": 2048, 
    "return_full_text": False, 
    "temperature": 0.0, 
    "do_sample": False, 
} 

output = pipe(messages, **generation_args) 
output2 = pipe2(messages, **generation_args) 

print(output[0]['generated_text'])
print("\n\n -------------------------------- \n\n")
print(output2[0]['generated_text'])

# Write the generated text to the file
with open('text.txt', 'a') as file:
    file.write("\n\n ----------------------------NEW FILE-------------------------------- \n\n")
    file.write(str(output))
    file.write("\n\n -------------------------------- \n\n")
    file.write(str(output2))


The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
You are not running the flash-attention implementation, expect numerical differences.


 <BEGIN SUMMARY>
Alex and Taylor are discussing a recent event where a friend came out as gay. Taylor expresses discomfort with the trend of people openly declaring their sexual orientation and is critical of the concept of sex change surgeries. Alex defends the importance of respect and acceptance for individuals' identities. The conversation highlights differing views on the visibility and acceptance of LGBTQ+ identities and events.
<END SUMMARY>


 -------------------------------- 


 In a conversation between Alex and Taylor, the topic of sexual orientation and gender identity arises, with Taylor expressing discomfort and skepticism towards the increasing visibility and acceptance of LGBTQ+ individuals. Taylor questions the trend of people openly declaring their sexual orientation, views gender transition surgeries as absurd, and perceives the push for LGBTQ+ acceptance as a burden on society. Alex, on the other hand, defends the right of individuals to live authentically and respe

## Evaluate the performance

In [28]:
# 'load_metric' is a function from the 'datasets' library that loads a metric for evaluating the model.
# Metrics are used to measure the performance of the model on certain tasks.
from datasets import load_metric

We'll employ the ROUGE metric to assess performance. While it may not be the optimal metric, it's straightforward and convenient to utilize.

In [29]:
# 'load_metric("rouge", trust_remote_code=True)' loads the ROUGE metric from the 'datasets' library.
# ROUGE is a set of metrics used to evaluate automatic summarization and machine translation.
# 'trust_remote_code' is set to True, which means that the metric will trust and execute remote code.
# The loaded metric is stored in the 'rouge_metric' variable.
rouge_metric = load_metric("rouge", trust_remote_code=True)

  rouge_metric = load_metric("rouge", trust_remote_code=True)


In [30]:
taxo='''You are an Expert in the Informational Data Privacy Taxonomy provided to you now. Here is the Taxonomy-

<BEGIN INFORMATIONAL DATA PRIVACY TAXONOMY>

	1. Generic
		○ High Sensitivity: 
			○ Slangs
				○ Profanity
				○ Insults
				○ Mockery
			○ Authorization
				○ Credentials
					® UserID
					® Password
			○  Government IDs
				○ License Numbers
				○ National Identification Numbers (Aadhar, PAN, etc.)
				○ Passport Numbers
				○ Voter ID Numbers
				○ Vehicle Registration Numbers
			○ Age
			○ Weight
			○ Sizes
				○ Clothes
				○ Shoes
				○ Shirts
				○ Pants
		○ Medium Sensitivity: 
			○ Username/ Social handle
			○ Physical Features
				○ Height
				○ Build
				○ Complexion
				○ hair
				○ Face
					® Eyes
					® Nose
					
			○ Demographics
				○ Date of Birth
				○ Place of Birth
				○ Nationality
		○ Low Sensitivity: 
			○ Demographics
				○ Language
				○ Race
				○ Ethnicity
	2. Family and Relationships
		○ High Sensitivity: 
			§ Marital records
				□ Relationship history
				□ Partners
					® Status
					® Names
			§ family history
				□ Disputes
				□ Strained relationships
			§ Inheritance- Will / Beneficiaries
		○ Medium Sensitivity: 
			§ family members
				□ Names
				□ Number of members
		○ Low Sensitivity: 
			§ General relationship status/ Marital status
			§ Family members
				□ Relations
					® Father
					® Mother
					® Brother
					® Sister
					® Cousin
					® Other relatives
	3. Healthcare Settings
		○ High Sensitivity: 
			§ Medications
			§ Medical History
			§ Genetic conditions
			§ Diseases
			§ Mental Health Issues
		○ Medium Sensitivity: 
			§ Health Insurance details
		○ Low Sensitivity: 
			§ General health status
	4. Employment
		○ High Sensitivity: 
			§ Employment status
			§ Work history
				□ Job titles
				□ Salaries
				□ Company names
				□ Manager's names
				□ Coworker names
				□ Work culture
				□ Performance
		○ Medium Sensitivity:
			§ Volunteering
			§ Employer information
				□ Company name
				□ Manager's names
			§ Professional references
				□ Reference Names
				□ Job Title
				□ Company name
		○ Low Sensitivity: 
			§ General employment status
	5. Finances
		○ High Sensitivity: 
			§ Payment information
				□ card numbers (+ CVV) (+ exp date)
				□ account numbers
			§ Insurance
				□ Amount / Premium
				□ Beneficiaries
			§ Loan
				□ Amount
				□ Interest
			§ Debt
				□ Amount
				□ Interest
			§ investment information
				□ Portfolio-related information
					® Amounts
		○ Medium Sensitivity: 
			§ Insurance
				□ Types
				□ Amount / Premium
				□ Beneficiaries
			§ Loan
				□ Scheme
				□ Amount
				□ Interest
			§ investment information
				□ Portfolio-related information
					® Funds
					® Bonds
					® Stocks
					® Bullions
		○ Low Sensitivity: 
			§ General financial status
	6. Social Media
		○ High Sensitivity: 
			§ Private chats
			§ personal posts
		○ Medium Sensitivity: 
			§ Friend lists
			§ group memberships
		○ Low Sensitivity: 
			§ Public posts
			§ Accounts followed
	7. Legal Proceedings
		○ High Sensitivity: 
			§ court records
				□ Criminal history
				□ Arrest records
				□ Settlement Amounts
			§ Civil case details
				□ Settlement Amounts
		○ Medium Sensitivity: 
			§ Civil case details
				□ Lawsuits
				□ Settlements
		○ Low Sensitivity: 
			§ Legal representation contact information
				□ Firms
				□ Lawyers
				□ Fees
	8. Political Activities
		○ High Sensitivity: 
			§ Membership in political organizations (Specific names)
				□ NGOs
				□ Committees
				□ Volunteer Work
			§ Political Involvement
				□ Political Parties
				□ Political opinions
				□ activism details
					®  Meeting Attendance 
					® Membership Fees
					® Donations
				□ Roles in propaganda/ agendas
			§ Voting Details
				□ Voting Records
				□ Ballot Details
				□ Voting Dates
				□ Voting Locations
		○ Medium Sensitivity: 
			§ Membership in political organizations (Generic or vague description)
				□ NGOs
				□ Committees
				□ Volunteer Work
		○ Low Sensitivity: 
			§ Voting history
	9. Religious Contexts
		○ High Sensitivity: 
			§ Religions
			□ Specific Religious beliefs
			□ Religious Ceremonies
			□ conversion history
		○ Medium Sensitivity: 
			§ Involvement in religious events
			§ Common Ceremonies
			□ Festival Participation
			□ Volunteer Roles
		○ Low Sensitivity: 
			§ General religious affiliation
	10. Sexual Orientation and Gender Identity
		○ High Sensitivity: 
			§ Sexual identity
				□ Sexual Orientation
				□ Coming Out Stories
				□ Partner Preferences
			§ gender identity
				□ Gender Identity
				□ Pronouns
				□ Transition History
				□ Clothing Preferences
		○ Medium Sensitivity:
			§  Participation in LGBTQ+ events
				□ Pride Events, LGBTQ+ Meetups, Support Groups
				□ Roles
					® Organizer
					® Volunteer 
					® Attendee
					® Speaker Roles
		○ Low Sensitivity: 
			§ General demographic information
	11. Travel and Location
		○ High Sensitivity: 
			§ Travel history
				□ Detailed Itineraries
				□ Addresses of Stay
			§ Bookings (Hotels/Restaurants)
				□ Names
				□ Booking Dates/ Timings
				□ Room Numbers
				□ Room sharers
			§ GPS data
				□ Current Location
				□ Geo-tagged Photos
			§ Modes of Transportation
				□ Vehicle Numbers
				□ Vehicle Models
				□ Vehicle Plans
				□ Vehicle Rentals
		○ Medium Sensitivity: 
			§ Modes of Transportation
			§ Recent Travels(Generic)
			§ Overview of Places of Stay
				□ Rent
				□ Hotel
				□ Owned Places
		○ Low Sensitivity: 
			§ General location information
	12. Education
		○ High Sensitivity: 
			§ Academic records
				□ Courses Done/Failed
				□ Assignment Completed /Failed
				□ Exam Scores
				□ GPA
			§ Disciplinary Records
				□ Violations
				□ Penalties
			§ Degree details
				□ Degrees Earned
				□ Majors
				□ Minors
			§ School attended
				□ Name
				□ Fees
			§ College attended
				□ Name
				□ Fees
		○ Medium Sensitivity: 
			§ School attended
				□ Batch/ Year
			§ College attended
				□ Batch/ Year
		○ Low Sensitivity: 
			§ School attended
				□ Country
			§ College attended
				□ Country
			§ Future Plans

<END INFORMATIONAL DATA PRIVACY TAXONOMY>

Use this information to do as directed and asked.'''

In [31]:
import pandas as pd
from datasets import load_metric,Dataset, concatenate_datasets
import random
import torch 
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline 

model = AutoModelForCausalLM.from_pretrained( 
    "./PSTax3/",  
    device_map="cuda",  
    torch_dtype="auto",  
    trust_remote_code=True,  
) 

model2 = AutoModelForCausalLM.from_pretrained( 
    "microsoft/Phi-3.5-mini-instruct",  
    device_map="cuda",  
    torch_dtype="auto",  
    trust_remote_code=True,  
) 

tokenizer = AutoTokenizer.from_pretrained("./PSTax3/") 
tokenizer2 = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct") 

pipe = pipeline( 
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
) 

pipe2 = pipeline( 
    "text-generation", 
    model=model2, 
    tokenizer=tokenizer2, 
) 

generation_args = { 
    "max_new_tokens": 2048, 
    "return_full_text": False, 
    "temperature": 0.0, 
    "do_sample": False, 
} 

loading configuration file ./PSTax3/config.json
loading configuration file ./PSTax3/config.json
Model config Phi3Config {
  "_name_or_path": "./PSTax3/",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3.5-mini-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3.5-mini-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "model_type": "phi3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "original_max_position_embeddings": 4096,
  "pad_token_id": 32000,
  "resid_pdrop": 0.0,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "long_factor": [
      1.0800000429153442,
      1.1100000143051147,
      1.1399999856

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at ./PSTax3/.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
loading configuration file ./PSTax3/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32007,
    32001,
    32000
  ],
  "pad_token_id": 32000
}

loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3.5-mini-instruct/snapshots/ccf028fc8e1b3ab750a7c55b22792f57ba69f216/config.json
loading configuration file config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3.5-mini-instruct/snapshots/ccf028fc8e1b3ab750a7c55b22792f57ba69f216/config.json
Model config Phi3Config {
  "_name_or_path": "microsoft/Phi-3.5-mini-instruct"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/Phi-3.5-mini-instruct.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
loading configuration file generation_config.json from cache at /home/t-ppurkayast/.cache/huggingface/hub/models--microsoft--Phi-3.5-mini-instruct/snapshots/ccf028fc8e1b3ab750a7c55b22792f57ba69f216/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32007,
    32001,
    32000
  ],
  "pad_token_id": 32000
}

loading file tokenizer.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
loading file t

In [32]:
import pandas as pd
from datasets import load_metric,Dataset, concatenate_datasets
import random
import torch 
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline 
import pandas as pd
from azure.identity import AzureCliCredential, get_bearer_token_provider
from openai import AzureOpenAI
import re

token_provider = get_bearer_token_provider(
    AzureCliCredential(), "https://cognitiveservices.azure.com/.default"
)

client = AzureOpenAI(
    # api_version="2023-03-15-preview",
    api_version="2024-02-15-preview",
    azure_endpoint="https://hywayllm-gpt4.openai.azure.com/",
    azure_ad_token_provider=token_provider
)


# Load the dataset
file_path = './splits/final_test_split.csv'  # Replace with the actual file path
df = pd.read_csv(file_path)

# Load the rouge metric
rouge_metric = load_metric("rouge", trust_remote_code=True)
torch.random.manual_seed(0) 

def calculate_rouge(row):
    dialog = row['dialog']
    true_summary = row['summary']

    rv= ''' Provide a privacy preserving summary for the following conversation - '''
    rv2= ''' Provide a privacy preserving summary for the following conversation in about 50-200 words - '''
    prompt = f''' {rv} \n {dialog}'''
    prompt2 = f''' {rv2} \n {dialog}'''

    messages = [ 
    {"role": "user", "content": prompt}, 
]     
    messages2 = [ 
    {"role": "user", "content": prompt2}, 
] 

    response = client.chat.completions.create(
    model="hywaygpt4o", # model = "deployment_name".
    messages=messages2)

    # Generate the summary using the pipe function
    generated_summary = pipe(messages, **generation_args) [0]['generated_text']  # assuming the generated text is in this format
    generated_summary2 = f"<BEGIN SUMMARY>\r\n\r\n{pipe2(messages2, **generation_args) [0]['generated_text']}\r\n\r\n<END SUMMARY>" # assuming the generated text is in this format
    

    # Append the dialog and summary to the result DataFrame
    text= response.choices[0].message.content
    
    generated_summary3 = f"<BEGIN SUMMARY>\r\n\r\n{text}\r\n\r\n<END SUMMARY>" # assuming the generated text is in this format

    # Compute ROUGE scores
    rouge_scores = rouge_metric.compute(predictions=[generated_summary], references=[true_summary])
    rouge_scores2 = rouge_metric.compute(predictions=[generated_summary2], references=[true_summary])
    rouge_scores3 = rouge_metric.compute(predictions=[generated_summary3], references=[true_summary])
    
    # Extract ROUGE scores (R1, R2, RL, RLsum)

    row['summary_base_model']= generated_summary2
    row['summary_finetuned']= generated_summary
    row['summary_4o']= generated_summary3

    row['base_model_rouge1'] = rouge_scores2['rouge1'].mid.fmeasure
    row['Finetuned_rouge1'] = rouge_scores['rouge1'].mid.fmeasure
    row['4o_rouge1'] = rouge_scores3['rouge1'].mid.fmeasure

    row['base_model_rouge2'] = rouge_scores2['rouge2'].mid.fmeasure
    row['Finetuned_rouge2'] = rouge_scores['rouge2'].mid.fmeasure
    row['4o_rouge2'] = rouge_scores3['rouge2'].mid.fmeasure

    row['base_model_rougeL'] = rouge_scores2['rougeL'].mid.fmeasure
    row['Finetuned_rougeL'] = rouge_scores['rougeL'].mid.fmeasure
    row['4o_rougeL'] = rouge_scores3['rougeL'].mid.fmeasure


    row['base_model_rougeLsum'] = rouge_scores2['rougeLsum'].mid.fmeasure if 'rougeLsum' in rouge_scores2 else None
    row['Finetuned_rougeLsum'] = rouge_scores['rougeLsum'].mid.fmeasure if 'rougeLsum' in rouge_scores else None
    row['4o_rougeLsum'] = rouge_scores3['rougeLsum'].mid.fmeasure if 'rougeLsum' in rouge_scores3 else None

    print(row)
    
    return row

# Apply the ROUGE calculation for each row in the dataframe
df = df.apply(calculate_rouge, axis=1)

# Save the final dataset with ROUGE scores
df.to_csv('./splits/final_test_rouge.csv', index=False)




setting                                                         Education
dialog                  <BEGIN CONVERSATION>\n\n\n\nAlicia: Hey Mark, ...
metadata                <BEGIN METADATA>\n\n\n\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\n\n\nAlicia and Mark discuss...
Violations                <BEGIN VIOLATIONS>\n\nNone.\n\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\n\nGOOD\n\n<END LABEL>
__index_level_0__                                                  1083.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\nAlicia and Mark discuss a fr...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTwo students, Alicia an...
base_model_rouge1                                                0.463918
Finetuned_rouge1                                                     0.52
4o_rouge1                                                        0.483333
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\r\n\r\nAlice: Hey Mark, h...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\r\n\r\nAlice and Mark discuss ...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   704.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAlice and Mark discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAlice and Mark discuss ...
base_model_rouge1                                                0.369748
Finetuned_rouge1                                                 0.465116
4o_rouge1                                                        0.453202
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\n\n\n\nEmily: Hey, Robert...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\n\n\nEmily and Robert remini...
Violations              <BEGIN VIOLATIONS>\n1. education.high.academic...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   740.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\nEmily and Robert reminisce a...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmily and Robert remini...
base_model_rouge1                                                0.418118
Finetuned_rouge1                                                 0.359712
4o_rouge1                                                        0.484211
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\r\n\r\nJulia: Hey Mark, h...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\nJulia and Mark discussed St...
Violations              <BEGIN VIOLATIONS>\n1. education.high.academic...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   696.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJulia and Mark discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJulia and Mark discusse...
base_model_rouge1                                                0.434043
Finetuned_rouge1                                                 0.459893
4o_rouge1                                                        0.468619
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\r\n\r\nArjun: Hey, Emily!...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nArjun and Emily discuss the...
Violations              <BEGIN VIOLATIONS>\n\t1. education.high.academ...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   695.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nArjun and Emily discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nArjun and Emily catch u...
base_model_rouge1                                                0.373626
Finetuned_rouge1                                                 0.282486
4o_rouge1                                                        0.504673
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\r\n\r\nTim: Hey Alice, di...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion be...
summary                 <BEGIN SUMMARY>\n\nAlice and Tim discussed the...
Violations              <BEGIN VIOLATIONS>\n\t1. education.high.academ...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   690.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nTim and Alice discuss ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTim and Alice discussed...
base_model_rouge1                                                0.382022
Finetuned_rouge1                                                 0.307692
4o_rouge1                                                             0.4
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\r\n\r\nAshley: Hey Tom, d...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nAshley and Tom discuss ...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   673.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAshley and Tom discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAshley and Tom discusse...
base_model_rouge1                                                0.323741
Finetuned_rouge1                                                 0.506024
4o_rouge1                                                        0.417062
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\n\n\n\nEmma: Hey, Ben! Lo...
metadata                <BEGIN METADATA>\n\n\n\nContext: Catching up o...
summary                 <BEGIN SUMMARY>\n\n\n\nEmma and Ben discuss va...
Violations              <BEGIN VIOLATIONS>\n1. education.high.academic...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   842.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nEmma and Ben discuss their a...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmma and Ben reconnect ...
base_model_rouge1                                                0.362416
Finetuned_rouge1                                                 0.451613
4o_rouge1                                                        0.471698
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\r\n\r\nEmily: Hey Mark, r...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion be...
summary                 <BEGIN SUMMARY>\r\n\r\nEmily and Mark reminisc...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   661.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nEmily and Mark reminis...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmily and Mark reminisc...
base_model_rouge1                                                     0.4
Finetuned_rouge1                                                 0.503067
4o_rouge1                                                        0.465517
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\r\n\r\nEmily: Hey, Mark! ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion be...
summary                 <BEGIN SUMMARY>\r\n\r\nEmily and Mark are disc...
Violations              <BEGIN VIOLATIONS>\n1. education.high.academic...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   651.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nEmily and Mark discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmily and Mark discusse...
base_model_rouge1                                                 0.39403
Finetuned_rouge1                                                 0.209424
4o_rouge1                                                        0.466926
base_model_rouge2                     

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\n\n\n\nAmelia: Hey, did y...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nAmelia and Brian discuss va...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   845.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nAmelia and Brian discussed a...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAmelia and Brian discus...
base_model_rouge1                                                0.303406
Finetuned_rouge1                                                 0.347305
4o_rouge1                                                        0.373913
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\r\n\r\nLinda: Hey Alex, d...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nAlex and Linda discuss ...
Violations              <BEGIN VIOLATIONS>\r\n\t1. employment.high.emp...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   183.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nLinda and Alex discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLinda and Alex discuss ...
base_model_rouge1                                                0.505556
Finetuned_rouge1                                                 0.324324
4o_rouge1                                                        0.609375
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\n\n\n\nJessica: Hey, Mark...
metadata                <BEGIN METADATA>\n\n\n\nContext: Catching up b...
summary                 <BEGIN SUMMARY>\n\n\n\nJessica and Mark catch ...
Violations              <BEGIN VIOLATIONS>\n\n1. employment.high.emplo...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   743.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nMark recently got promoted t...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJessica and Mark catch ...
base_model_rouge1                                                0.344473
Finetuned_rouge1                                                 0.438095
4o_rouge1                                                        0.536797
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\r\n\r\nNina: Hey Jake, ha...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nNina and Jake discuss r...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   175.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nNina and Jake discuss ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nNina and Jake discuss r...
base_model_rouge1                                                0.265306
Finetuned_rouge1                                                 0.551181
4o_rouge1                                                        0.244726
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\r\n\r\nRachel: Hey John, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion be...
summary                 <BEGIN SUMMARY>\r\n\r\nRachel and John discuss...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   174.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nRachel and John discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nRachel and John discuss...
base_model_rouge1                                                0.254237
Finetuned_rouge1                                                 0.410256
4o_rouge1                                                        0.411483
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\r\n\r\nEmma: Hey, Tom, lo...
metadata                <BEGIN METADATA>\r\n\r\nContext: Catch-up conv...
summary                 <BEGIN SUMMARY>\r\n\r\nTom, who recently got p...
Violations              <BEGIN VIOLATIONS>\n\t1. employment.high.emplo...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   169.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nEmma and Tom catch up ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmma and Tom, who haven...
base_model_rouge1                                                0.408946
Finetuned_rouge1                                                 0.441989
4o_rouge1                                                        0.516129
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\r\n\r\nAmelia: Hey Jake, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion be...
summary                 <BEGIN SUMMARY>\n\nJake recently got promoted ...
Violations              <BEGIN VIOLATIONS>\n1. employment.high.employm...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   152.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Amelia and Jake discus...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAmelia and Jake discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAmelia and Jake discuss...
base_model_rouge1                                                0.446735
Finetuned_rouge1                                                 0.401747
4o_rouge1                                                        0.474576
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\n\n\n\nLinda: Hey Mark, d...
metadata                <BEGIN METADATA>\n\n\n\nContext: Office Conver...
summary                 <BEGIN SUMMARY>\n\nLinda and Mark discuss care...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   753.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\n\nLinda and Mark discuss the...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLinda and Mark discuss ...
base_model_rouge1                                                0.296578
Finetuned_rouge1                                                 0.471429
4o_rouge1                                                        0.374429
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\r\n\r\nNaomi: Hey Lucas, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nLucas, recently promote...
Violations              <BEGIN VIOLATIONS>\n1. employment.high.employm...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   140.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nNaomi and Lucas discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nNaomi and Lucas catch u...
base_model_rouge1                                                0.538462
Finetuned_rouge1                                                 0.335025
4o_rouge1                                                        0.529968
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\r\n\r\nEmma: Hey Carlos, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Office Conver...
summary                 <BEGIN SUMMARY>\n\nCarlos and Emma discuss sev...
Violations              <BEGIN VIOLATIONS>\n\t1. employment.high.emplo...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   130.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nEmma and Carlos discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmma and Carlos discuss...
base_model_rouge1                                                 0.54485
Finetuned_rouge1                                                 0.344828
4o_rouge1                                                        0.528169
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\n\n\n\n**Emily:** You kno...
metadata                <BEGIN METADATA>\n\n\n\nContext: Casual person...
summary                 <BEGIN SUMMARY>\n\n\n\nEmily and Brian discuss...
Violations              <BEGIN VIOLATIONS>\n\n1. family.high.family_hi...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   863.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent family con...
summary_finetuned        <BEGIN SUMMARY>\nEmily and Brian discussed va...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmily and Brian discuss...
base_model_rouge1                                                0.513011
Finetuned_rouge1                                                 0.369427
4o_rouge1                                                        0.588235
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\r\n\r\nSarah: You know, I...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nSarah and Paul discuss ...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    53.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSarah and Paul discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSarah and Paul catch up...
base_model_rouge1                                                0.323625
Finetuned_rouge1                                                 0.597222
4o_rouge1                                                        0.492754
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\n\n\n\nAna: Hey Tim, did ...
metadata                <BEGIN METADATA>\n\n\n\nContext: Informal conv...
summary                 <BEGIN SUMMARY>\n\n\n\nAna and Tim discuss the...
Violations              <BEGIN VIOLATIONS>\n\n1. family.high.family_hi...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   757.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\nAna and Tim discuss a friend...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAna and Tim discuss var...
base_model_rouge1                                                0.474026
Finetuned_rouge1                                                 0.393782
4o_rouge1                                                        0.538776
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Laura:** So, how...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nKevin had a tense weekend d...
Violations              <BEGIN VIOLATIONS>\n\t1. Family and Relationsh...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                    45.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Kevin's family is embr...
summary_finetuned        <BEGIN SUMMARY>\r\nKevin is dealing with a fa...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLaura and Kevin discuss...
base_model_rouge1                                                0.518272
Finetuned_rouge1                                                 0.555024
4o_rouge1                                                        0.533865
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\r\n\r\nEmily: Hey Jasmine...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nEmily and Jasmine discuss t...
Violations              <BEGIN VIOLATIONS>\n\t1. family.high.family_hi...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                    44.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Sarah's family is curr...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nEmily and Jasmine disc...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmily and Jasmine discu...
base_model_rouge1                                                0.373134
Finetuned_rouge1                                                  0.43871
4o_rouge1                                                        0.578431
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Sophia:** Hey, L...
metadata                <BEGIN METADATA>\r\n\r\nContext: Catch-up conv...
summary                 <BEGIN SUMMARY>\n\nSophia and Liam catch up on...
Violations              <BEGIN VIOLATIONS>\n1. family.high.family_memb...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                    39.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\nLiam and Sophia catch up o...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLiam and Sophia catch u...
base_model_rouge1                                                0.353659
Finetuned_rouge1                                                 0.540541
4o_rouge1                                                        0.454106
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Sophia:** Hey Ja...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nSophia and Jake catch up on...
Violations              <BEGIN VIOLATIONS>\n1. Family and Relationship...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                    22.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\nJake and Sophia catch up o...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSophia and Jake reconne...
base_model_rouge1                                                0.446352
Finetuned_rouge1                                                 0.409938
4o_rouge1                                                        0.353535
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\n\n\n\n**Sara:** Hey John...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\n\n\nJohn and Sara discuss a...
Violations              <BEGIN VIOLATIONS>\n\n\n\n1. family.high.famil...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   828.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n The conversation betwe...
summary_finetuned        <BEGIN SUMMARY>\nSara and John discuss the on...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSara and John discuss a...
base_model_rouge1                                                0.301282
Finetuned_rouge1                                                   0.4875
4o_rouge1                                                        0.430622
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Rachel:** Hey Da...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\r\n\r\nDave and Rachel discuss...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                    10.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nDave and Rachel discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nRachel and Dave discuss...
base_model_rouge1                                                0.317152
Finetuned_rouge1                                                 0.535948
4o_rouge1                                                        0.321101
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Laura:** Hey Pet...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nPeter and Laura discuss var...
Violations              <BEGIN VIOLATIONS>\n\t1. family.high.family_hi...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                     0.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nPeter and Laura catch ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLaura reconnected with ...
base_model_rouge1                                                0.481928
Finetuned_rouge1                                                 0.442308
4o_rouge1                                                        0.488889
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\n\n\n\nJohn: Jess, have y...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\n\n\nJohn and Jess discussed...
Violations              <BEGIN VIOLATIONS>\n1. finances.high.investmen...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   890.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nJohn and Jess discussed thei...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJohn and Jess discuss t...
base_model_rouge1                                                0.338658
Finetuned_rouge1                                                     0.31
4o_rouge1                                                        0.374558
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\r\n\r\nJames: **Hey Sarah...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nJames and Sarah discuss...
Violations              <BEGIN VIOLATIONS>\r\n**None.**\r\n<END VIOLAT...
Quality                          <BEGIN LABEL>\r\n**GOOD**\r\n<END LABEL>
__index_level_0__                                                   248.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n James and Sarah discus...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJames and Sarah discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJames and Sarah discuss...
base_model_rouge1                                                0.431373
Finetuned_rouge1                                                 0.506329
4o_rouge1                                                        0.453202
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\n\n\n\nLaura: "Hey Dan, I...
metadata                <BEGIN METADATA>\n\n\n\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\n\n\n\nLaura and Dan discussed...
Violations              <BEGIN VIOLATIONS>\n1. Finances.high.investmen...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   770.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent discussion...
summary_finetuned        <BEGIN SUMMARY>\nLaura and Dan discussed thei...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTwo friends, Laura and ...
base_model_rouge1                                                0.411392
Finetuned_rouge1                                                 0.564103
4o_rouge1                                                        0.519149
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\r\n\r\nJohn: Hey Karen, d...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion be...
summary                 <BEGIN SUMMARY>\r\n\r\nKaren and John discusse...
Violations              <BEGIN VIOLATIONS>\n\n1. finances.high.loan.am...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   240.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent discussion...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJohn and Karen discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJohn and Karen discusse...
base_model_rouge1                                                0.378947
Finetuned_rouge1                                                 0.502857
4o_rouge1                                                        0.405063
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\r\n\r\nSarah: "Hey Mark, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nSarah and Mark discusse...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   239.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSarah and Mark discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSarah and Mark discuss ...
base_model_rouge1                                                0.337079
Finetuned_rouge1                                                 0.539877
4o_rouge1                                                        0.446281
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\r\n\r\nLiam: **Hey, Olivi...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\r\n\r\nLiam and Olivia discuss...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   234.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Olivia and Liam discus...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nLiam and Olivia discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLiam and Olivia engage ...
base_model_rouge1                                                0.466667
Finetuned_rouge1                                                 0.540541
4o_rouge1                                                        0.483051
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\r\n\r\nAnna: I'm really s...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\r\n\r\nAnna and Ben discussed ...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   217.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAnna and Ben are discu...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAnna and Ben shared the...
base_model_rouge1                                                0.425532
Finetuned_rouge1                                                 0.539474
4o_rouge1                                                        0.472574
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\n\n\n\nJohn: "Hey Sarah, ...
metadata                <BEGIN METADATA>\n\n\n\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\n\n\nJohn informed Sarah abo...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   873.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nJohn and Sarah discuss a fri...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJohn and Sarah discuss ...
base_model_rouge1                                                0.384615
Finetuned_rouge1                                                 0.551724
4o_rouge1                                                        0.494505
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\r\n\r\nSarah: You know, A...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nSarah and Allen discuss...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   205.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSarah and Allen discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSarah and Allen discuss...
base_model_rouge1                                                0.443686
Finetuned_rouge1                                                 0.497238
4o_rouge1                                                        0.421053
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\r\n\r\nRachel: Hey Mark, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\r\n\r\nRachel and Mark discuss...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   195.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nRachel and Mark discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nRachel and Mark discuss...
base_model_rouge1                                                0.512821
Finetuned_rouge1                                                     0.56
4o_rouge1                                                        0.556054
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\n\n\n\nAnna: "Hey, Kevin....
metadata                <BEGIN METADATA>\n\n\n\nContext: Catch-up disc...
summary                 <BEGIN SUMMARY>\n\n\n\nAnna and Kevin discuss ...
Violations                <BEGIN VIOLATIONS>\n\nNone.\n\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\n\nGOOD\n\n<END LABEL>
__index_level_0__                                                   971.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\n\nKevin shared that he had s...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAnna and Kevin catch up...
base_model_rouge1                                                0.441315
Finetuned_rouge1                                                 0.506494
4o_rouge1                                                        0.497561
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\r\n\r\n**John:** Hey, Lis...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual discus...
summary                 <BEGIN SUMMARY>\r\n\r\nJohn and Lisa discuss t...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   118.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJohn and Lisa discuss ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nIn a conversation betwe...
base_model_rouge1                                                0.389105
Finetuned_rouge1                                                 0.533333
4o_rouge1                                                         0.48731
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\n\n\n\n**Carla:** I ran i...
metadata                <BEGIN METADATA>\n\n\n\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\n\n\nCarla and Juan discusse...
Violations              <BEGIN VIOLATIONS>\n\n1. healthcare.high.medic...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   829.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\n\nCarla and Juan discussed t...
summary_4o              <BEGIN SUMMARY>\r\n\r\nCarla and Juan discusse...
base_model_rouge1                                                0.420664
Finetuned_rouge1                                                 0.553459
4o_rouge1                                                        0.524017
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\r\n\r\nLena: Hey, Alan, y...
metadata                <BEGIN METADATA>\r\n\r\nContext: Friends discu...
summary                 <BEGIN SUMMARY>\n\nAlan shared that he has bee...
Violations              <BEGIN VIOLATIONS>\n\n1. finances.high.loan.in...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   110.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Alan is managing multi...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAlan and Lena discusse...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAlan and Lena have a ca...
base_model_rouge1                                                0.437299
Finetuned_rouge1                                                 0.262295
4o_rouge1                                                        0.516129
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\r\n\r\nEmma: Hey Lucas, d...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nEmma and Lucas discuss a fr...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   109.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent office mee...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nEmma and Lucas discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmma and Lucas discuss ...
base_model_rouge1                                                0.346405
Finetuned_rouge1                                                 0.627451
4o_rouge1                                                        0.507937
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\r\n\r\nAva: You know, I j...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\r\n\r\nAva discussed her recen...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   104.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Ava and Ethan discuss ...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAva and Ethan discuss ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAva and Ethan discuss t...
base_model_rouge1                                                 0.47619
Finetuned_rouge1                                                 0.448276
4o_rouge1                                                        0.446154
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\r\n\r\nLisa: I just got b...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nLisa and Alex discussed...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                    87.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Lisa and Alex discuss ...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nLisa and Alex discusse...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLisa and Alex discuss t...
base_model_rouge1                                                0.416667
Finetuned_rouge1                                                 0.691729
4o_rouge1                                                        0.475138
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\n\n\n\n**Rachel:** Hey Et...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\n\n\nRachel and Ethan discus...
Violations              <BEGIN VIOLATIONS>\n\n1. healthcare.high.medic...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   886.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Ethan, a university st...
summary_finetuned        <BEGIN SUMMARY>\nEthan recently had surgery f...
summary_4o              <BEGIN SUMMARY>\r\n\r\nIn a recent conversatio...
base_model_rouge1                                                0.495327
Finetuned_rouge1                                                 0.502674
4o_rouge1                                                        0.508772
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Jessica:** Hey O...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nJessica and Oliver disc...
Violations              <BEGIN VIOLATIONS>\r\n\t1. healthcare.high.med...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                    75.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nOliver and Jessica dis...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJessica and Oliver have...
base_model_rouge1                                                0.413534
Finetuned_rouge1                                                 0.505495
4o_rouge1                                                        0.506122
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\r\n\r\nAlice: Hey Brad, h...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\nAlice and Brad discussed Sa...
Violations              <BEGIN VIOLATIONS>\n\n1. healthcare.high.medic...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                    65.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAlice and Brad discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAlice and Brad discusse...
base_model_rouge1                                                0.404959
Finetuned_rouge1                                                 0.391534
4o_rouge1                                                        0.518219
base_model_rouge2                     



setting                                                 Legal Proceedings
dialog                  <BEGIN CONVERSATION>\n\n\n\nJessica: Hey Tom, ...
metadata                <BEGIN METADATA>\n\n\n\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\nJessica and Tom discussed t...
Violations                <BEGIN VIOLATIONS>\n\nNone.\n\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\n\nGOOD\n\n<END LABEL>
__index_level_0__                                                  1019.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\nJessica and Tom discuss a fr...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTom and Jessica discuss...
base_model_rouge1                                                0.510288
Finetuned_rouge1                                                 0.480447
4o_rouge1                                                        0.454902
base_model_rouge2                     



setting                                                 Legal Proceedings
dialog                  <BEGIN CONVERSATION>\r\n\r\nSamantha: **Hey Lu...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nSamantha and Luke discuss a...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   378.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Karen is facing a chal...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSamantha and Luke disc...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSamantha and Luke discu...
base_model_rouge1                                                0.377049
Finetuned_rouge1                                                 0.564103
4o_rouge1                                                        0.480874
base_model_rouge2                     



setting                                                 Legal Proceedings
dialog                  <BEGIN CONVERSATION>\n\n\n\nSophia: Hey Liam, ...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nSophia and Liam are discuss...
Violations              <BEGIN VIOLATIONS>\n1. legal_proceedings.high....
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   790.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n David is currently fac...
summary_finetuned        <BEGIN SUMMARY>\n\nSophia and Liam discuss a ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nThe conversation betwee...
base_model_rouge1                                                0.418719
Finetuned_rouge1                                                 0.518919
4o_rouge1                                                        0.570248
base_model_rouge2                     



setting                                                 Legal Proceedings
dialog                  <BEGIN CONVERSATION>\r\n\r\nJoe: Hey, did you ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\nLydia is currently facing m...
Violations              <BEGIN VIOLATIONS>\n\t1. legal_proceedings.hig...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   370.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Lydia is entangled in ...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJoe and Anna discuss L...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAnna and Joe discuss Ly...
base_model_rouge1                                                0.537102
Finetuned_rouge1                                                  0.53125
4o_rouge1                                                        0.579365
base_model_rouge2                     



setting                                                 Legal Proceedings
dialog                  <BEGIN CONVERSATION>\r\n\r\nRachel: **Hey John...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nRachel informs John abo...
Violations              <BEGIN VIOLATIONS>\r\n\r\n1. legal_proceedings...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   369.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nRachel and John discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nRachel and John discuss...
base_model_rouge1                                                0.395833
Finetuned_rouge1                                                 0.494505
4o_rouge1                                                        0.493274
base_model_rouge2                     



setting                                                 Legal Proceedings
dialog                  <BEGIN CONVERSATION>\r\n\r\nJessica: **Did you...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nJessica and David are d...
Violations              <BEGIN VIOLATIONS>\n\t1. legal_proceedings.hig...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   364.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Tom, a man with a hist...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJessica and David disc...
summary_4o              <BEGIN SUMMARY>\r\n\r\nThe conversation revolv...
base_model_rouge1                                                0.447552
Finetuned_rouge1                                                 0.440476
4o_rouge1                                                        0.444444
base_model_rouge2                     



setting                                                 Legal Proceedings
dialog                  <BEGIN CONVERSATION>\r\n\r\nSamantha: **Hey Al...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nSamantha and Alex discussed...
Violations              <BEGIN VIOLATIONS>\r\n1. legal_proceedings.hig...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   347.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSamantha and Alex disc...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSamantha and Alex discu...
base_model_rouge1                                                0.407143
Finetuned_rouge1                                                 0.463415
4o_rouge1                                                        0.532189
base_model_rouge2                     



setting                                                 Legal Proceedings
dialog                  <BEGIN CONVERSATION>\n\n\n\nSarah: *Hey, John....
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nSarah informs John about Je...
Violations              <BEGIN VIOLATIONS>\n1. legal_proceedings.high....
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   907.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nSarah and John discuss a fri...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSarah and John discuss ...
base_model_rouge1                                                 0.51049
Finetuned_rouge1                                                 0.391061
4o_rouge1                                                        0.604839
base_model_rouge2                     



setting                                                 Legal Proceedings
dialog                  <BEGIN CONVERSATION>\r\n\r\nLisa: **Hey, Alex,...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nLisa and Alex discuss T...
Violations              <BEGIN VIOLATIONS>\r\n\t1. legal_proceedings.h...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   335.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nLisa and Alex discuss ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTwo individuals, Lisa a...
base_model_rouge1                                                0.571429
Finetuned_rouge1                                                 0.523256
4o_rouge1                                                        0.567308
base_model_rouge2                     



setting                                                 Legal Proceedings
dialog                  <BEGIN CONVERSATION>\r\n\r\nJames: Hey Jenna, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nJames and Jenna discuss...
Violations              <BEGIN VIOLATIONS>\n\t1. legal_proceedings.hig...
Quality                              <BEGIN LABEL>\r\nBAD \r\n<END LABEL>
__index_level_0__                                                   325.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJames and Jenna discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTwo individuals, James ...
base_model_rouge1                                                0.439834
Finetuned_rouge1                                                 0.505882
4o_rouge1                                                        0.475728
base_model_rouge2                     



setting                                              Political Activities
dialog                  <BEGIN CONVERSATION>\n\n\n\nEmma: Hey Alex, di...
metadata                <BEGIN METADATA>\n\n\n\nContext: Friends discu...
summary                 <BEGIN SUMMARY>\n\nEmma and Alex discussed the...
Violations              <BEGIN VIOLATIONS>\n1. political.high.politica...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   912.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nEmma and Alex discussed a re...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmma and Alex discuss t...
base_model_rouge1                                                0.517647
Finetuned_rouge1                                                 0.480447
4o_rouge1                                                           0.475
base_model_rouge2                     



setting                                              Political Activities
dialog                  <BEGIN CONVERSATION>\r\n\r\nDavid: So, Emma, h...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion be...
summary                 <BEGIN SUMMARY>\r\n\r\nDavid and Emma discuss ...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   443.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nDavid and Emma discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nDavid and Emma discusse...
base_model_rouge1                                                0.356436
Finetuned_rouge1                                                 0.496552
4o_rouge1                                                        0.437247
base_model_rouge2                     



setting                                              Political Activities
dialog                  <BEGIN CONVERSATION>\n\n\n\nAmit: **Hey Deepa,...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nAmit and Deepa discussed se...
Violations              <BEGIN VIOLATIONS>\n1. political.high.membersh...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   797.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nAmit and Deepa discussed rec...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAmit and Deepa engaged ...
base_model_rouge1                                                0.473333
Finetuned_rouge1                                                 0.519608
4o_rouge1                                                        0.513011
base_model_rouge2                     



setting                                              Political Activities
dialog                  <BEGIN CONVERSATION>\r\n\r\nPriya: So, Rahul, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\nRahul recently joined the N...
Violations              <BEGIN VIOLATIONS>\n\t1. political_activities....
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   435.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Rahul has recently joi...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nPriya and Rahul discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nPriya and Rahul discuss...
base_model_rouge1                                                0.475884
Finetuned_rouge1                                                 0.375635
4o_rouge1                                                        0.537634
base_model_rouge2                     



setting                                              Political Activities
dialog                  <BEGIN CONVERSATION>\r\n\r\nNina: Hey Mark, di...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\nNina and Mark discuss atten...
Violations              <BEGIN VIOLATIONS>\n\t1. political_activities....
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   434.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nNina and Mark discuss ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nNina and Mark discuss a...
base_model_rouge1                                                0.446281
Finetuned_rouge1                                                 0.386364
4o_rouge1                                                        0.527197
base_model_rouge2                     



setting                                              Political Activities
dialog                  <BEGIN CONVERSATION>\r\n\r\nEmily: Hey Mark, d...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\nEmily and Mark discuss the ...
Violations              <BEGIN VIOLATIONS>\n\t1. sexual_orientation_an...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   429.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n The conversation betwe...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nEmily and Mark discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmily and Mark discuss ...
base_model_rouge1                                                 0.39375
Finetuned_rouge1                                                 0.597701
4o_rouge1                                                        0.543779
base_model_rouge2                     



setting                                              Political Activities
dialog                  <BEGIN CONVERSATION>\r\n\r\nEmily: Hey, Lucas!...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\nEmily and Lucas discussed t...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   412.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nLucas attended a polit...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmily and Lucas discuss...
base_model_rouge1                                                0.374582
Finetuned_rouge1                                                 0.420455
4o_rouge1                                                         0.45098
base_model_rouge2                     



setting                                              Political Activities
dialog                  <BEGIN CONVERSATION>\n\n\n\nSamantha: **Hey Ma...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nSamantha and Mark discussed...
Violations              <BEGIN VIOLATIONS>\n1. sexual_orientation_and_...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   810.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nSamantha and Mark discussed ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSamantha and Mark discu...
base_model_rouge1                                                0.337079
Finetuned_rouge1                                                  0.43871
4o_rouge1                                                        0.392523
base_model_rouge2                     



setting                                              Political Activities
dialog                  <BEGIN CONVERSATION>\r\n\r\nJessica: Hey Mark,...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nMark discussed his active i...
Violations              <BEGIN VIOLATIONS>\n\t1. political_activities....
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   400.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nMark and Jessica discu...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJessica and Mark discus...
base_model_rouge1                                                0.379562
Finetuned_rouge1                                                 0.421622
4o_rouge1                                                        0.466667
base_model_rouge2                     



setting                                              Political Activities
dialog                  <BEGIN CONVERSATION>\r\n\r\nSamantha: **So, di...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\r\nSamantha and Bryan discusse...
Violations              <BEGIN VIOLATIONS>\r\n\t1. political_activitie...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   390.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a local council ini...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSamantha and Bryan dis...
summary_4o              <BEGIN SUMMARY>\r\n\r\nBryan and Samantha disc...
base_model_rouge1                                                0.350365
Finetuned_rouge1                                                 0.335404
4o_rouge1                                                        0.563107
base_model_rouge2                     



setting                                                Religious Contexts
dialog                  <BEGIN CONVERSATION>\n\n\n\nPriya: **Hey Rajes...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nPriya and Rajesh discussed ...
Violations              <BEGIN VIOLATIONS>\n1. religious.high.religion...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   922.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nRajesh attended a seminar di...
summary_4o              <BEGIN SUMMARY>\r\n\r\nPriya and Rajesh discus...
base_model_rouge1                                                0.426471
Finetuned_rouge1                                                 0.435294
4o_rouge1                                                        0.377358
base_model_rouge2                     



setting                                                Religious Contexts
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Sofia**: Did you...
metadata                <BEGIN METADATA>\r\n\r\nContext: Chat about re...
summary                 <BEGIN SUMMARY>\r\n\r\nSofia and Amit are disc...
Violations              <BEGIN VIOLATIONS>\r\n\t1. religious.high.reli...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   508.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSofia and Amit discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSofia and Amit discusse...
base_model_rouge1                                                0.486111
Finetuned_rouge1                                                 0.518919
4o_rouge1                                                        0.571429
base_model_rouge2                     



setting                                                Religious Contexts
dialog                  <BEGIN CONVERSATION>\n\n\n\nPriya: **Hey David...
metadata                <BEGIN METADATA>\n\n\n\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\n\nPriya and David discuss an ...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   784.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n The conversation betwe...
summary_finetuned        <BEGIN SUMMARY>\nPriya and David discuss an u...
summary_4o              <BEGIN SUMMARY>\r\n\r\nPriya and David discuss...
base_model_rouge1                                                0.363171
Finetuned_rouge1                                                 0.597701
4o_rouge1                                                         0.52549
base_model_rouge2                     



setting                                                Religious Contexts
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Julia**: Hey Tom...
metadata                <BEGIN METADATA>\r\n\r\nContext: Friends discu...
summary                 <BEGIN SUMMARY>\r\n\r\nJulia and Tom discussed...
Violations              <BEGIN VIOLATIONS>\n1. religions.high.religion...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   500.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nTom and Julia discusse...
summary_4o              <BEGIN SUMMARY>\r\n\r\nIn a conversation betwe...
base_model_rouge1                                                0.444444
Finetuned_rouge1                                                 0.511364
4o_rouge1                                                        0.454902
base_model_rouge2                     



setting                                                Religious Contexts
dialog                  <BEGIN CONVERSATION>\r\n\r\nPriya: Hey, John! ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\n\nJohn missed a charity event...
Violations              <BEGIN VIOLATIONS>\r\n1. religious.high.specif...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   499.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nPriya and John catch u...
summary_4o              <BEGIN SUMMARY>\r\n\r\nPriya and John catch up...
base_model_rouge1                                                0.407295
Finetuned_rouge1                                                 0.481283
4o_rouge1                                                        0.445902
base_model_rouge2                     



setting                                                Religious Contexts
dialog                  <BEGIN CONVERSATION>\r\n\r\nSophia: Hey Luke, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\r\n\r\nLuke and Sophia discuss...
Violations              <BEGIN VIOLATIONS>\n1. religions.high.religion...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   494.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n At an interfaith dialo...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSophia and Luke discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSophia and Luke discuss...
base_model_rouge1                                                0.393013
Finetuned_rouge1                                                     0.45
4o_rouge1                                                        0.465517
base_model_rouge2                     



setting                                                Religious Contexts
dialog                  <BEGIN CONVERSATION>\r\n\r\n***\r\nPriya: Hey ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\r\n\r\nPriya and Sam discuss t...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   477.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nPriya and Sam discusse...
summary_4o              <BEGIN SUMMARY>\r\n\r\nPriya and Sam discussed...
base_model_rouge1                                                0.394984
Finetuned_rouge1                                                 0.509554
4o_rouge1                                                        0.506667
base_model_rouge2                     



setting                                                Religious Contexts
dialog                  <BEGIN CONVERSATION>\n\n\n\nMira: Hey Tom, lon...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\n\n\nTom and Mira catch up a...
Violations              <BEGIN VIOLATIONS>\n1. religions.high.specific...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   898.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\nTom and Mira discussed their...
summary_4o              <BEGIN SUMMARY>\r\n\r\nMira and Tom catch up a...
base_model_rouge1                                                0.393258
Finetuned_rouge1                                                  0.33121
4o_rouge1                                                        0.498221
base_model_rouge2                     



setting                                                Religious Contexts
dialog                  <BEGIN CONVERSATION>\r\n\r\nSophia: Hey Mark, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nSophia and Mark discuss...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   465.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nMark and Sophia discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nMark and Sophia discuss...
base_model_rouge1                                                0.440559
Finetuned_rouge1                                                 0.556818
4o_rouge1                                                        0.451282
base_model_rouge2                     



setting                                                Religious Contexts
dialog                  <BEGIN CONVERSATION>\r\n\r\nJames: Hey Lisa, d...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\n\nJames and Lisa discussed th...
Violations              <BEGIN VIOLATIONS>\n\n1. religion.high.transit...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   455.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJames and Lisa discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLisa and James discuss ...
base_model_rouge1                                                0.403974
Finetuned_rouge1                                                 0.459893
4o_rouge1                                                        0.491228
base_model_rouge2                     



setting                            Sexual Orientation and Gender Identity
dialog                  <BEGIN CONVERSATION>\n\n\n\n**Sophia:** Hey Ma...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\n\n\nSophia and Mark discuss...
Violations                <BEGIN VIOLATIONS>\n\nNone.\n\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\n\nGOOD\n\n<END LABEL>
__index_level_0__                                                  1059.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\n\nSophia and Mark discussed ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSophia and Mark discuss...
base_model_rouge1                                                0.381579
Finetuned_rouge1                                                 0.547619
4o_rouge1                                                        0.417778
base_model_rouge2                     



setting                            Sexual Orientation and Gender Identity
dialog                  <BEGIN CONVERSATION>\r\n\r\nSamantha: **"Hey J...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nSamantha and Jamie disc...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   573.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSamantha and Jamie dis...
summary_4o              <BEGIN SUMMARY>\r\n\r\nIn a conversation, Sama...
base_model_rouge1                                                 0.39759
Finetuned_rouge1                                                 0.497041
4o_rouge1                                                         0.49789
base_model_rouge2                     



setting                            Sexual Orientation and Gender Identity
dialog                  <BEGIN CONVERSATION>\n\n\n\n**Rachel:** Hey Sa...
metadata                <BEGIN METADATA>\n\n\n\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\n\nRachel and Sam are discussi...
Violations              <BEGIN VIOLATIONS>\n1. sng_identity.high.trans...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   877.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nRachel and Sam discussed the...
summary_4o              <BEGIN SUMMARY>\r\n\r\nRachel and Sam discusse...
base_model_rouge1                                                0.418919
Finetuned_rouge1                                                 0.363636
4o_rouge1                                                        0.444444
base_model_rouge2                     



setting                            Sexual Orientation and Gender Identity
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Sophie:** Hey, M...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nTaylor came out as non-bina...
Violations              <BEGIN VIOLATIONS>\n1. sng_identity.high.gende...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   565.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent team meeti...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSophie and Matt discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSophie and Matt discuss...
base_model_rouge1                                                0.532787
Finetuned_rouge1                                                 0.324022
4o_rouge1                                                        0.471111
base_model_rouge2                     



setting                            Sexual Orientation and Gender Identity
dialog                  <BEGIN CONVERSATION>\r\n\r\nJenna: **Hey Tom, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Informal conv...
summary                 <BEGIN SUMMARY>\r\n\r\nJenna and Tom discussed...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   564.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJenna and Tom discuss ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJenna and Tom discussed...
base_model_rouge1                                                0.553571
Finetuned_rouge1                                                 0.587629
4o_rouge1                                                        0.567164
base_model_rouge2                     



setting                            Sexual Orientation and Gender Identity
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Emily:** Hey Chr...
metadata                <BEGIN METADATA>\r\n\r\nContext: Informal conv...
summary                 <BEGIN SUMMARY>\r\n\r\nEmily and Chris discuss...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   559.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a supportive conver...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nEmily and Chris discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmily and Chris discuss...
base_model_rouge1                                                0.468468
Finetuned_rouge1                                                 0.551724
4o_rouge1                                                        0.493023
base_model_rouge2                     



setting                            Sexual Orientation and Gender Identity
dialog                  <BEGIN CONVERSATION>\r\n\r\nJamie: "Hey Taylor...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\r\n\r\nJamie and Taylor discus...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   542.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJamie and Taylor discu...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTwo friends, Jamie and ...
base_model_rouge1                                                0.495935
Finetuned_rouge1                                                 0.598802
4o_rouge1                                                        0.555556
base_model_rouge2                     



setting                            Sexual Orientation and Gender Identity
dialog                  <BEGIN CONVERSATION>\n\n\n\n**Jordan:** Hey Ch...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\n\n\nJordan and Chris discus...
Violations              <BEGIN VIOLATIONS>\n1. sng_identity.high.gende...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   930.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nJordan and Chris discussed a...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJordan and Chris discus...
base_model_rouge1                                                 0.48254
Finetuned_rouge1                                                 0.519774
4o_rouge1                                                        0.528139
base_model_rouge2                     



setting                            Sexual Orientation and Gender Identity
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Jenny:** Hey Luc...
metadata                <BEGIN METADATA>\r\n\r\nContext: Friends discu...
summary                 <BEGIN SUMMARY>\r\n\r\nJenny and Lucas talk ab...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   530.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n At a Pride parade, Luc...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJenny and Lucas are di...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTwo friends, Jenny and ...
base_model_rouge1                                                0.437768
Finetuned_rouge1                                                 0.666667
4o_rouge1                                                        0.537445
base_model_rouge2                     



setting                            Sexual Orientation and Gender Identity
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Sophia:** Hey Ma...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\nSophia and Marcus discuss t...
Violations              <BEGIN VIOLATIONS>\n1. sng_identity.high.gende...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   520.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSophia and Marcus disc...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSophia and Marcus, who ...
base_model_rouge1                                                   0.375
Finetuned_rouge1                                                 0.430769
4o_rouge1                                                        0.494545
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\r\n\r\nDavid: **Hey Laura...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nDavid and Laura discuss...
Violations              <BEGIN VIOLATIONS>\r\n1. employment.high.work_...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   322.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nDavid and Laura discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nDavid and Laura discuss...
base_model_rouge1                                                     0.5
Finetuned_rouge1                                                 0.357616
4o_rouge1                                                        0.366492
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\r\n\r\n**Alice:** Hey Bri...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion on...
summary                 <BEGIN SUMMARY>\r\n\r\nAlice and Brian discuss...
Violations              <BEGIN VIOLATIONS>\n1. social_media.high.priva...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   300.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAlice and Brian discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAlice and Brian discuss...
base_model_rouge1                                                0.466403
Finetuned_rouge1                                                 0.394737
4o_rouge1                                                        0.516129
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\n\n\n\nSophia: **Hey Mark...
metadata                <BEGIN METADATA>\n\n\n\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\n\n\n\nSophia and Mark discuss...
Violations                <BEGIN VIOLATIONS>\n\nNone.\n\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\n\nGOOD\n\n<END LABEL>
__index_level_0__                                                  1015.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\n\nSophia and Mark discuss va...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSophia and Mark discuss...
base_model_rouge1                                                0.534884
Finetuned_rouge1                                                      0.5
4o_rouge1                                                         0.38806
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\r\n\r\nAlex: **Hey Taylor...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion on...
summary                 <BEGIN SUMMARY>\r\n\r\nAlex and Taylor discuss...
Violations              <BEGIN VIOLATIONS>\r\n1. social_media.high.per...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   278.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAlex and Taylor discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAlex and Taylor discuss...
base_model_rouge1                                                0.392157
Finetuned_rouge1                                                 0.516129
4o_rouge1                                                        0.542714
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\n\n\n\nLiam: Hey, Olivia,...
metadata                <BEGIN METADATA>\n\n\n\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\nLiam and Olivia discuss the...
Violations                <BEGIN VIOLATIONS>\n\nNone.\n\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\n\nGOOD\n\n<END LABEL>
__index_level_0__                                                  1017.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\n\nLiam and Olivia discuss a ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLiam and Olivia discuss...
base_model_rouge1                                                0.324841
Finetuned_rouge1                                                 0.506667
4o_rouge1                                                        0.502513
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\n\n\n\n**Jessica:** Hey, ...
metadata                <BEGIN METADATA>\n\n\n\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\n\n\nJessica and Tom discuss...
Violations                <BEGIN VIOLATIONS>\n\nNone.\n\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\n\nGOOD\n\n<END LABEL>
__index_level_0__                                                  1004.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nJessica and Tom discuss thei...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJessica and Tom discuss...
base_model_rouge1                                                0.439252
Finetuned_rouge1                                                 0.447552
4o_rouge1                                                         0.52439
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\r\n\r\nVictor: Hey, Lexie...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\r\n\r\nVictor and Lexie discus...
Violations              <BEGIN VIOLATIONS>\r\n1. socialmedia.high.pers...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   324.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nVictor and Lexie discu...
summary_4o              <BEGIN SUMMARY>\r\n\r\nVictor and Lexie are co...
base_model_rouge1                                                0.493274
Finetuned_rouge1                                                 0.390533
4o_rouge1                                                        0.518868
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\r\n\r\nEmma: **Hey Jake, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\r\n\r\nEmma and Jake discuss t...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   302.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nEmma and Jake discuss ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmma and Jake discuss t...
base_model_rouge1                                                0.528846
Finetuned_rouge1                                                 0.532544
4o_rouge1                                                            0.58
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\r\n\r\nMaria: **Hey David...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\r\n\r\nDavid and Maria discuss...
Violations              <BEGIN VIOLATIONS>\r\n1. social_media.high.per...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   270.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation abou...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nMaria and David discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nMaria and David discuss...
base_model_rouge1                                                0.442553
Finetuned_rouge1                                                 0.336957
4o_rouge1                                                        0.328205
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\r\n\r\n**James:** Hey Kat...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\nJames and Katherine discuss...
Violations              <BEGIN VIOLATIONS>\n\tNone. (Just don't mentio...
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   260.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation abou...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJames and Katherine di...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJames and Katherine dis...
base_model_rouge1                                                0.392344
Finetuned_rouge1                                                 0.402116
4o_rouge1                                                        0.497817
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\n\n\n\nEmily: Hey John, I...
metadata                <BEGIN METADATA>\n\n\n\nContext: A conversatio...
summary                 <BEGIN SUMMARY>\n\n\n\nJohn recently returned ...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   950.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nJohn recently returned from ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJohn recently returned ...
base_model_rouge1                                                0.472362
Finetuned_rouge1                                                 0.555556
4o_rouge1                                                        0.475676
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\r\n\r\nLisa: So Jake, how...
metadata                <BEGIN METADATA>\r\n\r\nContext: Conversation ...
summary                 <BEGIN SUMMARY>\r\n\r\nJake shared details abo...
Violations              <BEGIN VIOLATIONS>\n\t1. travel_and_location.h...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   640.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Jake recently returned...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJake shared details ab...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJake recently traveled ...
base_model_rouge1                                                0.542373
Finetuned_rouge1                                                 0.506329
4o_rouge1                                                        0.478873
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\n\n\n\nDaniel: **Hey Amir...
metadata                <BEGIN METADATA>\n\n\n\nContext: **Casual conv...
summary                 <BEGIN SUMMARY>\n\n\n\nAmira recounted her Eur...
Violations              <BEGIN VIOLATIONS>\n\n    1. travel.high.hotel...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   732.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Amira and Daniel share...
summary_finetuned        <BEGIN SUMMARY>\nAmira shared details about h...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAmira recently returned...
base_model_rouge1                                                0.486647
Finetuned_rouge1                                                 0.280702
4o_rouge1                                                         0.46729
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\n\n\n\nSarah: Can't belie...
metadata                <BEGIN METADATA>\n\n\n\nContext: A casual conv...
summary                 <BEGIN SUMMARY>\n\n\n\nSarah and Tom are meeti...
Violations              <BEGIN VIOLATIONS>\n\n1. travel.high.hotel_boo...
Quality                               <BEGIN LABEL>\n\nBAD\n\n<END LABEL>
__index_level_0__                                                   814.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\nTom shared his recent trip t...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSarah and Tom catch up ...
base_model_rouge1                                                 0.41543
Finetuned_rouge1                                                 0.432161
4o_rouge1                                                        0.469231
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\r\n\r\nMichael: **Hey Cla...
metadata                <BEGIN METADATA>\r\n\r\nContext: **Casual conv...
summary                 <BEGIN SUMMARY>\r\n\r\nMichael and Clara discu...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   630.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Clara and Michael disc...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nClara and Michael disc...
summary_4o              <BEGIN SUMMARY>\r\n\r\nMichael and Clara discu...
base_model_rouge1                                                0.407143
Finetuned_rouge1                                                 0.505882
4o_rouge1                                                        0.482759
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\r\n\r\nHannah: I just got...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\r\n\r\nHannah recently returne...
Violations              <BEGIN VIOLATIONS>\r\n1. travel_and_location.h...
Quality                              <BEGIN LABEL>\r\nBAD \r\n<END LABEL>
__index_level_0__                                                   624.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Hannah recently return...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nHannah and Jake discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nHannah recently returne...
base_model_rouge1                                                0.512635
Finetuned_rouge1                                                 0.380952
4o_rouge1                                                        0.491379
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\r\n\r\nEmma: **Hey John, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Discussion ab...
summary                 <BEGIN SUMMARY>\n\nJohn shared details about h...
Violations              <BEGIN VIOLATIONS>\n1. travel_and_location.hig...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   607.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n John enjoyed a luxurio...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJohn shared details ab...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJohn and Emma catch up ...
base_model_rouge1                                                0.529617
Finetuned_rouge1                                                 0.397959
4o_rouge1                                                        0.473684
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\r\n\r\nMaria: So, Lucas, ...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\r\n\r\nLucas shared his experi...
Violations                <BEGIN VIOLATIONS>\r\nNone.\r\n<END VIOLATIONS>
Quality                              <BEGIN LABEL>\r\nGOOD\r\n<END LABEL>
__index_level_0__                                                   629.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n During a conversation ...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nMaria and Lucas discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLucas recounted his rec...
base_model_rouge1                                                0.468468
Finetuned_rouge1                                                 0.542373
4o_rouge1                                                        0.486726
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\r\n\r\nEmily: **Hey John,...
metadata                <BEGIN METADATA>\r\n\r\nContext: **Casual conv...
summary                 <BEGIN SUMMARY>\n\nEmily and John discussed Li...
Violations              <BEGIN VIOLATIONS>\n\n\t1. travel.high.hotel_b...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   595.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nEmily and John discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nEmily and John discuss ...
base_model_rouge1                                                0.513699
Finetuned_rouge1                                                 0.505051
4o_rouge1                                                        0.491228
base_model_rouge2                     



setting                                               Travel and Location
dialog                  \r\n<BEGIN CONVERSATION>\r\n\r\nJenna: **Hey M...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\r\n\r\nJenna shared details of...
Violations              <BEGIN VIOLATIONS>\n1. travel.high.travel_hist...
Quality                               <BEGIN LABEL>\r\nBAD\r\n<END LABEL>
__index_level_0__                                                   585.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJenna and Mark discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJenna and Mark discuss ...
base_model_rouge1                                                0.427481
Finetuned_rouge1                                                  0.50237
4o_rouge1                                                        0.520661
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\r\n\r\nOlivia: **Hey Mark...
metadata                <BEGIN METADATA>\r\n\r\nContext: Casual conver...
summary                 <BEGIN SUMMARY>\r\n\r\nMark recently traveled ...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                   603.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Mark and Olivia shared...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nMark and Olivia discus...
summary_4o              <BEGIN SUMMARY>\r\n\r\nOlivia and Mark discuss...
base_model_rouge1                                                0.467005
Finetuned_rouge1                                                  0.51462
4o_rouge1                                                        0.581633
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\nAnna: Next week is your ...
metadata                <BEGIN METADATA>\n\nContext: Casual conversati...
summary                 <BEGIN SUMMARY>\n\nRyan, who is turning 9 soon...
Violations              <BEGIN VIOLATIONS>\n1. Sexual Orientation and ...
Quality                                   <BEGIN LABEL>\nBAD\n<END LABEL>
__index_level_0__                                                    19.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation abou...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAnna and Ryan are disc...
summary_4o              <BEGIN SUMMARY>\r\n\r\nA playful conversation ...
base_model_rouge1                                                 0.44586
Finetuned_rouge1                                                 0.477876
4o_rouge1                                                        0.428571
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\nJohn: So it looks like w...
metadata                <BEGIN METADATA>\n\nContext: Discussion about ...
summary                 <BEGIN SUMMARY>\n\nJohn and Chen are discussin...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    49.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In the conversation, J...
summary_finetuned        <BEGIN SUMMARY>\nJohn and Chen are discussing...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTwo colleagues, John an...
base_model_rouge1                                                0.468619
Finetuned_rouge1                                                 0.493976
4o_rouge1                                                        0.512315
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\nAlex: Dad, can I go to a...
metadata                <BEGIN METADATA>\n\nContext: Conversation betw...
summary                 <BEGIN SUMMARY>\n\nAlex asks their dad for per...
Violations              <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>\n\...
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    48.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAlex and his dad discu...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAlex asks her dad for p...
base_model_rouge1                                                0.401961
Finetuned_rouge1                                                 0.420382
4o_rouge1                                                        0.551351
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\nCarlos: I've heard that ...
metadata                <BEGIN METADATA>\n\nContext: Customer inquirin...
summary                 <BEGIN SUMMARY>\n\nCarlos contacted Joan for m...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    12.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\nCarlos is planning to move f...
summary_4o              <BEGIN SUMMARY>\r\n\r\nCarlos contacted Joan's...
base_model_rouge1                                                 0.39548
Finetuned_rouge1                                                 0.503311
4o_rouge1                                                        0.497238
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nAlice: Are you excited a...
metadata                <BEGIN METADATA>\n\nContext: Casual conversati...
summary                 <BEGIN SUMMARY>\n\nJohn is anxious about his u...
Violations              <BEGIN VIOLATIONS>\n 1. healthcare.high.health...
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    44.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAlice and John discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAlice and John discuss ...
base_model_rouge1                                                0.477419
Finetuned_rouge1                                                 0.439716
4o_rouge1                                                        0.607407
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\nDavid: This is a good ba...
metadata                <BEGIN METADATA>\n\nContext: In-store conversa...
summary                 <BEGIN SUMMARY>\n\nDavid and Alex discuss a co...
Violations              <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>\n\...
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                     5.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation abou...
summary_finetuned        <BEGIN SUMMARY>\nDavid and Alex are discussin...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTwo individuals discuss...
base_model_rouge1                                                0.518519
Finetuned_rouge1                                                 0.628099
4o_rouge1                                                        0.641221
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nAlice: So, how was your ...
metadata                <BEGIN METADATA>\n\nContext: Conversation betw...
summary                 <BEGIN SUMMARY>\n\nMark recently returned from...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    17.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n During a casual conver...
summary_finetuned        <BEGIN SUMMARY>\nMark shared his recent vacat...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAlice asked Mark about ...
base_model_rouge1                                                0.567901
Finetuned_rouge1                                                 0.516129
4o_rouge1                                                         0.68323
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nTan Ling: Excuse me. You...
metadata                <BEGIN METADATA>\n\nContext: Business meeting ...
summary                 <BEGIN SUMMARY>\n\nTan Ling, a source manager ...
Violations              <BEGIN VIOLATIONS>\n1. travel_and_location.hig...
Quality                                   <BEGIN LABEL>\nBAD\n<END LABEL>
__index_level_0__                                                    52.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a professional exch...
summary_finetuned        <BEGIN SUMMARY>\n\nTan Ling, the source manag...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTan Ling meets Mr. Gree...
base_model_rouge1                                                     0.5
Finetuned_rouge1                                                 0.520548
4o_rouge1                                                        0.505263
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nJohn: Here we come.\nTax...
metadata                <BEGIN METADATA>\n\nContext: Taxi ride fare di...
summary                 <BEGIN SUMMARY>\n\nJohn and a taxi driver disc...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                     3.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a private conversat...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJohn and the taxi driv...
summary_4o              <BEGIN SUMMARY>\r\n\r\nA customer named John a...
base_model_rouge1                                                     0.5
Finetuned_rouge1                                                     0.68
4o_rouge1                                                            0.58
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\nGrace: Help! Are you a d...
metadata                <BEGIN METADATA>\n\nContext: Emergency medical...
summary                 <BEGIN SUMMARY>\n\nGrace sought urgent help fo...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    32.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a critical emergenc...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nGrace's pet is in crit...
summary_4o              <BEGIN SUMMARY>\r\n\r\nA distressed pet owner,...
base_model_rouge1                                                0.489796
Finetuned_rouge1                                                 0.549618
4o_rouge1                                                        0.426966
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\nJohn: Hi, Mike. Haven't ...
metadata                <BEGIN METADATA>\n\nContext: Casual conversati...
summary                 <BEGIN SUMMARY>\n\nMike and John discuss that ...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    13.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a recent conversati...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJohn and Mike are catc...
summary_4o              <BEGIN SUMMARY>\r\n\r\nMike and John catch up ...
base_model_rouge1                                                0.296774
Finetuned_rouge1                                                  0.57377
4o_rouge1                                                        0.592593
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\nAlice: Did Bean send the...
metadata                <BEGIN METADATA>\n\nContext: Discussion about ...
summary                 <BEGIN SUMMARY>\n\nAlice and Brian are discuss...
Violations              <BEGIN VIOLATIONS>\n1. generic.high.government...
Quality                                   <BEGIN LABEL>\nBAD\n<END LABEL>
__index_level_0__                                                     8.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAlice and Brian are di...
summary_4o              <BEGIN SUMMARY>\r\n\r\nIn a conversation, Alic...
base_model_rouge1                                                0.355556
Finetuned_rouge1                                                 0.484848
4o_rouge1                                                        0.485981
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\nJohn: Mrs. Phoebe, let's...
metadata                <BEGIN METADATA>\n\nContext: Business Negotiat...
summary                 <BEGIN SUMMARY>\n\nJohn and Mrs. Phoebe are di...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    26.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a private conversat...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJohn and Mrs. Phoebe d...
summary_4o              <BEGIN SUMMARY>\r\n\r\nIn a business conversat...
base_model_rouge1                                                0.549763
Finetuned_rouge1                                                 0.611399
4o_rouge1                                                         0.52381
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\nAlice: Could you do me a...
metadata                <BEGIN METADATA>\n\nContext: Alice asking Bob ...
summary                 <BEGIN SUMMARY>\n\nAlice asked Bob to run to t...
Violations              <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>\n\...
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                     6.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a private conversat...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAlice asks Bob to pick...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAlice asks Bob to run a...
base_model_rouge1                                                0.294118
Finetuned_rouge1                                                 0.369565
4o_rouge1                                                         0.44186
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\nJohn: Good morning. I’m ...
metadata                <BEGIN METADATA>\n\nContext: Interview about t...
summary                 <BEGIN SUMMARY>\n\nA New York book reviewer in...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    34.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\n\nJohn from a book review ag...
summary_4o              <BEGIN SUMMARY>\r\n\r\nA journalist from the N...
base_model_rouge1                                                0.478049
Finetuned_rouge1                                                 0.554054
4o_rouge1                                                        0.701571
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\nJohn: Can I help you?\nJ...
metadata                <BEGIN METADATA>\n\nContext: Inquiry about job...
summary                 <BEGIN SUMMARY>\n\nJudy Liao visited John to v...
Violations              <BEGIN VIOLATIONS>\n1. employment.high.credent...
Quality                                   <BEGIN LABEL>\nBAD\n<END LABEL>
__index_level_0__                                                     4.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a privacy-preservin...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJudy visited John to i...
summary_4o              <BEGIN SUMMARY>\r\n\r\nA job applicant visited...
base_model_rouge1                                                     0.5
Finetuned_rouge1                                                 0.601307
4o_rouge1                                                        0.465116
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\nAlicia: How many people ...
metadata                <BEGIN METADATA>\n\nContext: Casual conversati...
summary                 <BEGIN SUMMARY>\n\nJames and Alicia talk about...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    37.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\nJames and Alicia discuss the...
summary_4o              <BEGIN SUMMARY>\r\n\r\nIn this conversation, A...
base_model_rouge1                                                0.505618
Finetuned_rouge1                                                 0.586466
4o_rouge1                                                        0.413793
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\nOh, Linda. You must be s...
metadata                <BEGIN METADATA>\n\nContext: Discussion about ...
summary                 <BEGIN SUMMARY>\n\nLinda is excited about movi...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    24.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation abou...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nLinda is excited about...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLinda expresses her exc...
base_model_rouge1                                                 0.27972
Finetuned_rouge1                                                 0.561798
4o_rouge1                                                        0.348624
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\nAlice: I don't understan...
metadata                <BEGIN METADATA>\n\nContext: Discussion betwee...
summary                 <BEGIN SUMMARY>\n\nAlice is curious why Mr. Wa...
Violations              <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>\n\...
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    45.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\nAlice and Mr. Wang discuss h...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAlice and Mr. Wang disc...
base_model_rouge1                                                0.395722
Finetuned_rouge1                                                 0.601399
4o_rouge1                                                        0.409091
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nAlex: Ladies and gentlem...
metadata                <BEGIN METADATA>\n\nContext: Discussion about ...
summary                 <BEGIN SUMMARY>\n\nAlex informs the group abou...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    33.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a privacy-preservin...
summary_finetuned        <BEGIN SUMMARY>\nAlex and Jordan are discussi...
summary_4o              <BEGIN SUMMARY>\r\n\r\nAlex and Jordan discuss...
base_model_rouge1                                                0.455446
Finetuned_rouge1                                                 0.521739
4o_rouge1                                                        0.552381
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nWaiter: Are you ready to...
metadata                <BEGIN METADATA>\n\nContext: Restaurant orderi...
summary                 <BEGIN SUMMARY>\n\nA customer at a restaurant ...
Violations              <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>\n\...
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    50.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a dining conversati...
summary_finetuned        <BEGIN SUMMARY>\nThe customer ordered a crab ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nA customer and a waiter...
base_model_rouge1                                                0.409836
Finetuned_rouge1                                                  0.45977
4o_rouge1                                                        0.442748
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nPerson1: What's all the ...
metadata                <BEGIN METADATA>\n\nContext: Casual conversati...
summary                 <BEGIN SUMMARY>\n\nTwo individuals are at a ba...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    15.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a casual conversati...
summary_finetuned        <BEGIN SUMMARY>\nJimmy and Person1 are at a b...
summary_4o              <BEGIN SUMMARY>\r\n\r\nIn a casual conversatio...
base_model_rouge1                                                0.467066
Finetuned_rouge1                                                 0.533333
4o_rouge1                                                        0.582857
base_model_rouge2                     



setting                                          Family and Relationships
dialog                  <BEGIN CONVERSATION>\nJohn: How old is Keith?\...
metadata                <BEGIN METADATA>\n\nContext: Casual conversati...
summary                 <BEGIN SUMMARY>\n\nJohn and Carol discuss the ...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                     9.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a private conversat...
summary_finetuned        <BEGIN SUMMARY>\nJohn and Carol discuss the r...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJohn and Carol discusse...
base_model_rouge1                                                0.461538
Finetuned_rouge1                                                  0.45283
4o_rouge1                                                        0.447368
base_model_rouge2                     



setting                                                        Employment
dialog                  <BEGIN CONVERSATION>\nJane: Have you had any e...
metadata                <BEGIN METADATA>\n\nContext: Job interview con...
summary                 <BEGIN SUMMARY>\n\nTom has prior sales experie...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    16.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation abou...
summary_finetuned        <BEGIN SUMMARY>\nJane and Tom discuss Tom's p...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTom discussed his previ...
base_model_rouge1                                                 0.45283
Finetuned_rouge1                                                    0.576
4o_rouge1                                                        0.653846
base_model_rouge2                     



setting                                               Healthcare Settings
dialog                  <BEGIN CONVERSATION>\nChris: Why don't you wat...
metadata                <BEGIN METADATA>\n\nContext: Conversation betw...
summary                 <BEGIN SUMMARY>\n\nChris and Jordan were invol...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    30.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a vehicular inciden...
summary_finetuned        <BEGIN SUMMARY>\nChris and Jordan are arguing...
summary_4o              <BEGIN SUMMARY>\r\n\r\nTwo individuals, Chris ...
base_model_rouge1                                                0.485437
Finetuned_rouge1                                                 0.468085
4o_rouge1                                                        0.537445
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nJohn: Now I understand. ...
metadata                <BEGIN METADATA>\n\nContext: Casual conversati...
summary                 <BEGIN SUMMARY>\n\nJohn and Emily discuss what...
Violations              <BEGIN VIOLATIONS>\n1. finances.high.insurance...
Quality                                   <BEGIN LABEL>\nBAD\n<END LABEL>
__index_level_0__                                                    36.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJohn and Emily discuss...
summary_4o              <BEGIN SUMMARY>\r\n\r\nIn a recent conversatio...
base_model_rouge1                                                0.437158
Finetuned_rouge1                                                 0.537931
4o_rouge1                                                        0.445596
base_model_rouge2                     



setting                                                         Education
dialog                  <BEGIN CONVERSATION>\nAlicia: Would you talk t...
metadata                <BEGIN METADATA>\n\nContext: Discussion betwee...
summary                 <BEGIN SUMMARY>\n\nAlicia is considering takin...
Violations              <BEGIN VIOLATIONS>\n\t1. employment.work_histo...
Quality                                   <BEGIN LABEL>\nBAD\n<END LABEL>
__index_level_0__                                                    25.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nAlicia and Jordan disc...
summary_4o              <BEGIN SUMMARY>\r\n\r\nIn a conversation about...
base_model_rouge1                                                0.376238
Finetuned_rouge1                                                 0.466667
4o_rouge1                                                         0.54878
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nStacy: Hello, if my flig...
metadata                <BEGIN METADATA>\n\nContext: **Customer servic...
summary                 <BEGIN SUMMARY>\n\nStacy is worried about miss...
Violations              <BEGIN VIOLATIONS>\n    1. travel.high.modes_o...
Quality                                   <BEGIN LABEL>\nBAD\n<END LABEL>
__index_level_0__                                                    31.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nStacy is concerned abo...
summary_4o              <BEGIN SUMMARY>\r\n\r\nStacy inquired about th...
base_model_rouge1                                                0.476636
Finetuned_rouge1                                                 0.550725
4o_rouge1                                                        0.487047
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\nLucinda: Absolutely appa...
metadata                <BEGIN METADATA>\n\nContext: Customer complain...
summary                 <BEGIN SUMMARY>\n\nLucinda is outraged by Tesc...
Violations              <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>\n\...
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                     5.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a discussion about ...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nLucinda expressed her ...
summary_4o              <BEGIN SUMMARY>\r\n\r\nLucinda expressed frust...
base_model_rouge1                                                0.378151
Finetuned_rouge1                                                 0.422222
4o_rouge1                                                        0.479263
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\nXan: Waited in all day f...
metadata                <BEGIN METADATA>\n\nContext: Customer complain...
summary                 <BEGIN SUMMARY>\n\nA user named Xan expressed ...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    32.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation abou...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nXan is frustrated beca...
summary_4o              <BEGIN SUMMARY>\r\n\r\nXan expressed frustrati...
base_model_rouge1                                                 0.41791
Finetuned_rouge1                                                 0.550898
4o_rouge1                                                        0.480874
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nNatasha: Hello, assuming...
metadata                <BEGIN METADATA>\n\nContext: A conversation di...
summary                 <BEGIN SUMMARY>\n\nNatasha is frustrated with ...
Violations              <BEGIN VIOLATIONS>\n\t1. travel.high.travel_hi...
Quality                                   <BEGIN LABEL>\nBAD\n<END LABEL>
__index_level_0__                                                    13.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation abou...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nNatasha is experiencin...
summary_4o              <BEGIN SUMMARY>\r\n\r\nNatasha expressed frust...
base_model_rouge1                                                0.452229
Finetuned_rouge1                                                  0.43617
4o_rouge1                                                        0.477366
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nHamish: Hello, can you h...
metadata                <BEGIN METADATA>\n\nContext: Customer service ...
summary                 <BEGIN SUMMARY>\n\nHamish expressed frustratio...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    19.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nHamish is frustrated w...
summary_4o              <BEGIN SUMMARY>\r\n\r\nHamish reached out to a...
base_model_rouge1                                                0.456621
Finetuned_rouge1                                                 0.540541
4o_rouge1                                                        0.557214
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nJacqueline: Hello, I'm n...
metadata                <BEGIN METADATA>\n\nContext: Conversation rega...
summary                 <BEGIN SUMMARY>\n\nJacqueline missed her conne...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    49.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a privacy-preservin...
summary_finetuned        <BEGIN SUMMARY>\n\nJacqueline is experiencing...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJacqueline contacted LC...
base_model_rouge1                                                0.325301
Finetuned_rouge1                                                 0.432432
4o_rouge1                                                        0.423529
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nMaia: Hello, asking your...
metadata                <BEGIN METADATA>\n\nContext: Feedback and grie...
summary                 <BEGIN SUMMARY>\n\nMaia expressed frustration ...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    41.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n During a holiday seaso...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nMaia expressed frustra...
summary_4o              <BEGIN SUMMARY>\r\n\r\nMaia expressed frustrat...
base_model_rouge1                                                0.446352
Finetuned_rouge1                                                 0.554913
4o_rouge1                                                        0.512821
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nJason: So my MyChoice sc...
metadata                <BEGIN METADATA>\n\nContext: Complaint about a...
summary                 <BEGIN SUMMARY>\n\nJason's MyChoice delivery w...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    26.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Jason experienced sign...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJason experienced mult...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJason experienced signi...
base_model_rouge1                                                0.429448
Finetuned_rouge1                                                 0.468085
4o_rouge1                                                        0.452055
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\nJames: AT WHAT POINT CAN...
metadata                <BEGIN METADATA>\n\nContext: Customer support ...
summary                 <BEGIN SUMMARY>\n\nJames is experiencing frequ...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    43.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n James experienced buff...
summary_finetuned        <BEGIN SUMMARY>\n\nJames is experiencing buff...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJames reported persiste...
base_model_rouge1                                                0.502994
Finetuned_rouge1                                                 0.587302
4o_rouge1                                                        0.493671
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\nJohn: Please help, my ne...
metadata                <BEGIN METADATA>\n\nContext: Customer service ...
summary                 <BEGIN SUMMARY>\n\nJohn is frustrated with ong...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    12.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n John is experiencing r...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJohn is experiencing i...
summary_4o              <BEGIN SUMMARY>\r\n\r\nA frustrated customer, ...
base_model_rouge1                                                0.494118
Finetuned_rouge1                                                 0.522388
4o_rouge1                                                        0.448087
base_model_rouge2                     



setting                                                      Social Media
dialog                  <BEGIN CONVERSATION>\nJoliz: I am having the m...
metadata                <BEGIN METADATA>\n\nContext: Customer support ...
summary                 <BEGIN SUMMARY>\n\nJoliz experienced frustrati...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    52.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Joliz expressed frustr...
summary_finetuned        <BEGIN SUMMARY>\n\nJoliz is frustrated with A...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJoliz expressed frustra...
base_model_rouge1                                                0.494845
Finetuned_rouge1                                                 0.426966
4o_rouge1                                                        0.454976
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nChris: Cancelled flight ...
metadata                <BEGIN METADATA>\n\nContext: Discussion about ...
summary                 <BEGIN SUMMARY>\n\nChris is highly dissatisfie...
Violations              <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>\n\...
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                     3.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nChris experienced mult...
summary_4o              <BEGIN SUMMARY>\r\n\r\nChris experiences signi...
base_model_rouge1                                                0.519608
Finetuned_rouge1                                                 0.444444
4o_rouge1                                                        0.466019
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nElin: Hi, how may we ass...
metadata                <BEGIN METADATA>\n\nContext: Customer support ...
summary                 <BEGIN SUMMARY>\n\nA customer contacted Elin t...
Violations              <BEGIN VIOLATIONS>\n1. travel_and_location.hig...
Quality                                   <BEGIN LABEL>\nBAD\n<END LABEL>
__index_level_0__                                                    33.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a privacy-preservin...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nThe customer is inquir...
summary_4o              <BEGIN SUMMARY>\r\n\r\nA customer inquired abo...
base_model_rouge1                                                0.561798
Finetuned_rouge1                                                 0.540541
4o_rouge1                                                        0.719577
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\nMuyiwa: Is this how you ...
metadata                <BEGIN METADATA>\n\nContext: Conversation betw...
summary                 <BEGIN SUMMARY>\n\nMuyiwa expressed frustratio...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    34.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nMuyiwa expressed frust...
summary_4o              <BEGIN SUMMARY>\r\n\r\nIn a conversation, Muyi...
base_model_rouge1                                                    0.47
Finetuned_rouge1                                                 0.567742
4o_rouge1                                                        0.552083
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\nDimitar: Hello, I bought...
metadata                <BEGIN METADATA>\n\nContext: Customer support ...
summary                 <BEGIN SUMMARY>\n\nDimitar contacted the store...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                     8.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n Dimitar contacted the ...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nDimitar purchased a Ce...
summary_4o              <BEGIN SUMMARY>\r\n\r\nDimitar reached out for...
base_model_rouge1                                                     0.5
Finetuned_rouge1                                                 0.525641
4o_rouge1                                                         0.52381
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nSarah: Thank you... Flig...
metadata                <BEGIN METADATA>\n\nContext: Complaint and reb...
summary                 <BEGIN SUMMARY>\n\nSarah's flight was canceled...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    17.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nSarah is frustrated be...
summary_4o              <BEGIN SUMMARY>\r\n\r\nSarah experienced frust...
base_model_rouge1                                                0.395349
Finetuned_rouge1                                                    0.375
4o_rouge1                                                        0.479452
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nDan: Hello, how long is ...
metadata                <BEGIN METADATA>\n\nContext: Customer service ...
summary                 <BEGIN SUMMARY>\n\nDan expressed frustration o...
Violations              <BEGIN VIOLATIONS>\n1. generic.high.authorizat...
Quality                                   <BEGIN LABEL>\nBAD\n<END LABEL>
__index_level_0__                                                     6.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation rega...
summary_finetuned        <BEGIN SUMMARY>\nDan is frustrated because hi...
summary_4o              <BEGIN SUMMARY>\r\n\r\nDan is frustrated becau...
base_model_rouge1                                                0.381743
Finetuned_rouge1                                                 0.337079
4o_rouge1                                                         0.47541
base_model_rouge2                     



setting                                                          Finances
dialog                  <BEGIN CONVERSATION>\nRahul: Hey! Amazon pantr...
metadata                <BEGIN METADATA>\n\nContext: Customer service ...
summary                 <BEGIN SUMMARY>\n\nRahul contacted Amazon supp...
Violations              <BEGIN VIOLATIONS>\n\t1. Generic.High Sensitiv...
Quality                                   <BEGIN LABEL>\nBAD\n<END LABEL>
__index_level_0__                                                     4.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a privacy-preservin...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nRahul contacted an age...
summary_4o              <BEGIN SUMMARY>\r\n\r\nRahul reported that his...
base_model_rouge1                                                0.415301
Finetuned_rouge1                                                 0.378049
4o_rouge1                                                        0.476744
base_model_rouge2                     



setting                                               Travel and Location
dialog                  <BEGIN CONVERSATION>\nJohn: Here we are again....
metadata                <BEGIN METADATA>\n\nContext: Customer support ...
summary                 <BEGIN SUMMARY>\n\nJohn reported losing all hi...
Violations                    <BEGIN VIOLATIONS>\nNone.\n<END VIOLATIONS>
Quality                                  <BEGIN LABEL>\nGOOD\n<END LABEL>
__index_level_0__                                                    47.0
summary_base_model      <BEGIN SUMMARY>\r\n\r\n In a conversation betw...
summary_finetuned        <BEGIN SUMMARY>\r\n\r\nJohn is experiencing a...
summary_4o              <BEGIN SUMMARY>\r\n\r\nJohn reported losing al...
base_model_rouge1                                                0.436782
Finetuned_rouge1                                                 0.606061
4o_rouge1                                                        0.654321
base_model_rouge2                     

KeyboardInterrupt: 

In [None]:
import re
import pandas as pd
from datasets import load_metric,Dataset, concatenate_datasets
import random
import torch 
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline 
import pandas as pd
from azure.identity import AzureCliCredential, get_bearer_token_provider
from openai import AzureOpenAI
import re

token_provider = get_bearer_token_provider(
    AzureCliCredential(), "https://cognitiveservices.azure.com/.default"
)

client = AzureOpenAI(
    # api_version="2023-03-15-preview",
    api_version="2024-02-15-preview",
    azure_endpoint="https://hywayllm-gpt4.openai.azure.com/",
    azure_ad_token_provider=token_provider
)

# Load the dataset
file_path = './splits/final_test_rouge.csv'  # Replace with the actual file path
df = pd.read_csv(file_path)
def calculate_metrics(row):
    print("IN")
    dialog = row['summary']

    rv= ''' Identify and return BOTH the label and the violations in the privacy preserving summary given below. LABEL CAN ONLY BE "GOOD" OR "BAD", NOTHING ELSE. IF LABEL IS "GOOD", THEN VIOLATIONS CAN ONLY BE "None.". State the violations by sticking to the Taxonomy provided earlier.
    
    This is the response output structure to be followed-
    
    <BEGIN STRUCTURE>

        <BEGIN LABEL>
        Enter Label here (ONLY CAN BE GOOD or BAD, NO OTHER VALUES ALLOWED)
        <END LABEL>

        <BEGIN VIOLATIONS>
        Enter Violations here (if Label is GOOD CAN ONLY BE "None.")
        <END VIOLATIONS>

    <END STRUCTURE>

    MANDATORILY FOLLOW THIS STRUCTURE WHEN GENERATING RESPONSE. LABEL CAN ONLY BE GOOD OR BAD, NO OTHER VALUES ALLOWED. IF LABEL IS GOOD THEN VIOLATIONS CAN ONLY BE "None.". MAKE SURE THAT THE RESPONSE STRUCTURE IS COMPULSORILY FOLLOWED AND HAS BOTH LABEL AND VIOLATIONS.
    '''
    rv2= rv
    prompt = f''' {rv} \n {dialog}'''
    prompt2 = f''' {rv2} \n {dialog}'''

    messages = [ 
    {"role": "system", "content": taxo}, 
    {"role": "user", "content": prompt}, 
]     
    messages2 = [ 
    {"role": "system", "content": taxo}, 
    {"role": "user", "content": prompt2}, 
] 
    
    response = client.chat.completions.create(
    model="hywaygpt4o", # model = "deployment_name".
    messages=messages2)

    # Append the dialog and summary to the result DataFrame
    generated_response3= response.choices[0].message.content

    # Generate the summary using the pipe function
    print("GEN1 START")
    generated_response = pipe(messages, **generation_args) [0]['generated_text']  # assuming the generated text is in this format
    print("GEN1 DONE")
    generated_response2 = pipe2(messages2, **generation_args) [0]['generated_text'] # assuming the generated text is in this format
    print("GEN2 DONE")

    print("FT: \n",generated_response )
    print("BM: \n",generated_response2 )
    print("4o: \n",generated_response3 )


	# Regular expression patterns to extract labels and violations
    label_pattern = r"(<BEGIN LABEL>.*?<END LABEL>)"
    violations_pattern = r"(<BEGIN VIOLATIONS>.*?<END VIOLATIONS>)"

	# Find matches using the regular expression patterns
    label_match = re.search(label_pattern, generated_response, re.DOTALL)
    violations_match = re.search(violations_pattern, generated_response, re.DOTALL)

    label_match2 = re.search(label_pattern, generated_response2, re.DOTALL)
    violations_match2 = re.search(violations_pattern, generated_response2, re.DOTALL)

    label_match3 = re.search(label_pattern, generated_response3, re.DOTALL)
    violations_match3 = re.search(violations_pattern, generated_response3, re.DOTALL)

	# Extract matched text if found
    labels = label_match.group(1) if label_match else '''SCREWED UP SMH'''
    violations = violations_match.group(1) if violations_match else '''SCREWED UP SMW'''

    	# Extract matched text if found
    labels2 = label_match2.group(1) if label_match2 else '''SCREWED UP SMH'''
    violations2 = violations_match2.group(1) if violations_match2 else '''SCREWED UP SMW'''

    labels3 = label_match3.group(1) if label_match3 else '''SCREWED UP SMH'''
    violations3 = violations_match3.group(1) if violations_match3 else '''SCREWED UP SMW'''
    
    # Extract ROUGE scores (R1, R2, RL, RLsum)

    row['label_base_model']= labels2
    row['violations_base_model']= violations2
    row['label_finetuned']= labels
    row['violations_finetuned']= violations    
    row['label_4o']= labels3
    row['violations_4o']= violations3
    print("WR DONE")

    print(row)

    print("Out")

    
    return row

# Apply the ROUGE calculation for each row in the dataframe
df = df.apply(calculate_metrics, axis=1)

# Save the final dataset with ROUGE scores
df.to_csv('./splits/final_test_rouge_label.csv', index=False)

In [None]:
import pandas as pd

# Load the dataset
file_path = './splits/final_test_rouge_label.csv'  # Replace with the actual file path
df = pd.read_csv(file_path)

# Initialize confusion matrix counters for Fine-tuned (FT) and Base Model (BM)
FT_TP = FT_TN = FT_FP = FT_FN = 0
BM_TP = BM_TN = BM_FP = BM_FN = 0

# Iterate through each row in the DataFrame
for index, row in df.iterrows():
    label = row['Quality']
    FT_label = row['label_finetuned']
    BM_label = row['label_base_model']

    # For cases where 'Quality' is marked as 'good'
    if "good" in label.lower():
        if "good" in FT_label.lower():
            FT_TN += 1  # True Negative for Fine-tuned
        else:
            FT_FP += 1  # False Positive for Fine-tuned
        
        if "good" in BM_label.lower():
            BM_TN += 1  # True Negative for Base Model
        else:
            BM_FP += 1  # False Positive for Base Model

    # For cases where 'Quality' is marked as 'bad'
    elif "bad" in label.lower():
        if "good" in FT_label.lower():
            FT_FN += 1  # True Positive for Fine-tuned
        else:
            FT_TP += 1  # False Negative for Fine-tuned
        
        if "good" in BM_label.lower():
            BM_FN += 1  # True Positive for Base Model
        else:
            BM_TP += 1  # False Negative for Base Model

# Function to calculate evaluation metrics
def calculate_metrics(TP, TN, FP, FN):
    accuracy = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    specificity = TN / (TN + FP) if (TN + FP) > 0 else 0
    return accuracy, precision, recall, f1_score, specificity

# Calculate metrics for Fine-tuned model
FT_accuracy, FT_precision, FT_recall, FT_f1, FT_specificity = calculate_metrics(FT_TP, FT_TN, FT_FP, FT_FN)
print(FT_TP, FT_TN, FT_FP, FT_FN)
# Calculate metrics for Base Model
BM_accuracy, BM_precision, BM_recall, BM_f1, BM_specificity = calculate_metrics(BM_TP, BM_TN, BM_FP, BM_FN)
print(BM_TP, BM_TN, BM_FP, BM_FN)

# Display results
print("\nBase Model Metrics:")
print(f"Accuracy: {BM_accuracy:.4f}")
print(f"Precision: {BM_precision:.4f}")
print(f"Recall: {BM_recall:.4f}")
print(f"F1-Score: {BM_f1:.4f}")
print(f"Specificity: {BM_specificity:.4f}")

print("-------------------------------------------------------------")


print("Fine-tuned Model Metrics:")
print(f"Accuracy: {FT_accuracy:.4f}")
print(f"Precision: {FT_precision:.4f}")
print(f"Recall: {FT_recall:.4f}")
print(f"F1-Score: {FT_f1:.4f}")
print(f"Specificity: {FT_specificity:.4f}")




Develop a function for performing inference and assessing an instance.

In [None]:
# This code block defines a function 'calculate_rogue' that calculates the ROUGE score for a given row in the dataset.

# 'row' is the input to the function. It is a row in the dataset that contains a message and its corresponding output.

# 'test_inference(row['messages'][0]['content'])' calls the 'test_inference' function with the first message in the row as the prompt.
# 'test_inference' performs inference on the prompt and returns a generated response.
# The response is stored in the 'response' variable.

# 'rouge_metric.compute' is a method that calculates the ROUGE score for the generated response and the corresponding output in the row.
# 'predictions' is set to the generated response and 'references' is set to the output in the row.
# 'use_stemmer' is set to True, which means that the method will use a stemmer to reduce words to their root form.
# The calculated ROUGE score is stored in the 'result' variable.

# The 'result' dictionary is updated to contain the F-measure of each ROUGE score multiplied by 100.
# The F-measure is a measure of a test's accuracy that considers both the precision and the recall of the test.

# The 'response' is added to the 'result' dictionary.

# The function returns the 'result' dictionary.
def calculate_rogue(row):
    response = test_inference(row['messages'][0]['content'])
    result = rouge_metric.compute(predictions=[response], references=[row['output']], use_stemmer=True)
    result = {key: value.mid.fmeasure * 100 for key, value in result.items()}
    result['response']=response
    return result

Now, we have the ability to execute inference on a collection of samples. For simplicity, the process isn't optimized at this stage. In the future, we plan to perform inference in batches to enhance performance. However, for the time being,

In [None]:
# '%%time' is a magic command in Jupyter notebooks that measures the execution time of the cell.

# 'dataset_chatml['test'].select(range(0,500))' selects the first 500 elements from the test set in the 'dataset_chatml' dataset.

# '.map(calculate_rogue, batched=False)' applies the 'calculate_rogue' function to each element in the selected subset.
# 'calculate_rogue' calculates the ROUGE score for each element.
# 'batched' is set to False, which means that the function will be applied to each element individually, not in batches.

# The results are stored in the 'metricas' variable.
%%time
metricas = dataset_chatml['test'].select(range(0,500)).map(calculate_rogue, batched=False)

In [None]:
# 'numpy' is a library in Python that provides support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays.
# 'import numpy as np' imports the 'numpy' library and gives it the alias 'np'. This allows us to use 'np' instead of 'numpy' when calling its functions.
import numpy as np

Now, we have the ability to compute the metric for the sample.

In [None]:
# This code block prints the mean of the ROUGE-1, ROUGE-2, ROUGE-L, and ROUGE-Lsum scores in the 'metricas' dictionary.

# 'np.mean(metricas['rouge1'])' calculates the mean of the ROUGE-1 scores.
# 'np.mean(metricas['rouge2'])' calculates the mean of the ROUGE-2 scores.
# 'np.mean(metricas['rougeL'])' calculates the mean of the ROUGE-L scores.
# 'np.mean(metricas['rougeLsum'])' calculates the mean of the ROUGE-Lsum scores.

# 'print' is used to print the calculated means to the console.
print("Rouge 1 Mean: ",np.mean(metricas['rouge1']))
print("Rouge 2 Mean: ",np.mean(metricas['rouge2']))
print("Rouge L Mean: ",np.mean(metricas['rougeL']))
print("Rouge Lsum Mean: ",np.mean(metricas['rougeLsum']))