## triplet

In [1]:
import json

import json

with open("rag_truth_train.json", "r") as f:
    train_data = json.load(f)
with open("rag_truth_dev.json", "r") as f:
    dev_data = json.load(f)
with open("rag_truth_test.json", "r") as f:
    test_data = json.load(f)

In [2]:
def add_prefix(data):
    for d in data:
        d["text"] = "Please judge the following statement as true or false based on the references above: " + d["text"]
    return data

train_data = add_prefix(train_data)
dev_data = add_prefix(dev_data)
test_data = add_prefix(test_data)

In [31]:
# task_type: QA, Data2txt, Summary
task_name = "Data2txt"
train_data = [d for d in train_data if d["task_type"] == task_name]
dev_data = [d for d in dev_data if d["task_type"] == task_name]
test_data = [d for d in test_data if d["task_type"] == task_name]

In [3]:
import random

def create_trip(data, id_list):
    trip = []
    for id in id_list:
        num = 0
        no_hal = []
        has_hal = []
        for d in data:
            if num == 6:
                num = 0
                if no_hal == [] or has_hal == []:
                    break
                # shuffle
                random.seed(id) # change seed
                no_hal = random.sample(no_hal, len(no_hal))
                has_hal = random.sample(has_hal, len(has_hal))
                
                if len(no_hal)==1 or len(no_hal)==5:
                    trip.append({"anchor":ref,"positive": no_hal[0], "negative": has_hal[0], "labels":[0,1]})
                elif len(no_hal)==2 or len(no_hal)==4:
                    trip.append({"anchor":ref,"positive": no_hal[0], "negative": has_hal[0], "labels":[0,1]})
                    trip.append({"anchor":ref,"positive": no_hal[1], "negative": has_hal[1], "labels":[0,1]})
                elif len(no_hal)==3:
                    trip.append({"anchor":ref,"positive": no_hal[0], "negative": has_hal[0], "labels":[0,1]})
                    trip.append({"anchor":ref,"positive": no_hal[1], "negative": has_hal[1], "labels":[0,1]})
                    trip.append({"anchor":ref,"positive": no_hal[2], "negative": has_hal[2], "labels":[0,1]})
                no_hal = []
                has_hal = []
                break
            elif d["source_id"] == id:
                num +=1
                ref = d["ref"]
                if d["labels"] == 0:
                    no_hal.append(d["text"])
                else: #hallucination
                    has_hal.append(d["text"])
        if num == 6:
            if len(no_hal)==1 or len(no_hal)==5:
                trip.append({"anchor":ref,"positive": no_hal[0], "negative": has_hal[0], "labels":[0,1]})
            elif len(no_hal)==2 or len(no_hal)==4:
                trip.append({"anchor":ref,"positive": no_hal[0], "negative": has_hal[0], "labels":[0,1]})
                trip.append({"anchor":ref,"positive": no_hal[1], "negative": has_hal[1], "labels":[0,1]})
            elif len(no_hal)==3:
                trip.append({"anchor":ref,"positive": no_hal[0], "negative": has_hal[0], "labels":[0,1]})
                trip.append({"anchor":ref,"positive": no_hal[1], "negative": has_hal[1], "labels":[0,1]})
                trip.append({"anchor":ref,"positive": no_hal[2], "negative": has_hal[2], "labels":[0,1]})
    return trip

In [4]:
train_id = [d["source_id"] for d in train_data]
train_id = list(set(train_id))
dev_id = [d["source_id"] for d in dev_data]
dev_id = list(set(dev_id))
test_id = [d["source_id"] for d in test_data]
test_id = list(set(test_id))
print(len(train_id), len(dev_id), len(test_id))
train_trip = create_trip(train_data, train_id)
dev_trip = create_trip(dev_data, dev_id)
test_trip = create_trip(test_data, test_id)

2305 210 450


In [5]:
len(train_trip), len(dev_trip), len(test_trip)

(3760, 337, 633)

In [6]:
from datasets import Dataset, DatasetDict
import pandas as pd

train_df = pd.DataFrame(train_trip)
dev_df = pd.DataFrame(dev_trip)
test_df = pd.DataFrame(test_trip)

train_ds = Dataset.from_pandas(train_df)
dev_ds = Dataset.from_pandas(dev_df)
test_ds = Dataset.from_pandas(test_df)

tri_raw_datasets = DatasetDict({"train": train_ds, "dev": dev_ds, "test": test_ds})
tri_raw_datasets

DatasetDict({
    train: Dataset({
        features: ['anchor', 'positive', 'negative', 'labels'],
        num_rows: 3760
    })
    dev: Dataset({
        features: ['anchor', 'positive', 'negative', 'labels'],
        num_rows: 337
    })
    test: Dataset({
        features: ['anchor', 'positive', 'negative', 'labels'],
        num_rows: 633
    })
})

In [None]:
from transformers import AutoTokenizer

tri_tokenizer = AutoTokenizer.from_pretrained("FacebookAI/RoBERTa-base")


In [8]:
tri_raw_datasets["train"][0]

{'anchor': "{'question': 'how do cold and warm fronts work'}",
 'positive': 'Please judge the following statement as true or false based on the references above: A cold front occurs when a cold air mass moves in and replaces a warmer air mass. It is a transition zone from warm air to cold air, moving from northwest to southeast. The air behind a cold front is colder and drier than the air ahead of it. The distinction between a cold front and a warm front is which one is moving and pushing the other one away. A warm front, on the other hand, occurs when a warm air mass moves in and replaces a colder air mass. When a warm or cold front stops moving, it becomes a stationary front. Once it resumes its forward motion, it becomes a warm front or a cold front again.',
 'negative': 'Please judge the following statement as true or false based on the references above: Based on the given passages, a cold front is the transition zone where a cold air mass is replacing a warmer air mass. It moves f

In [None]:
from transformers import DataCollatorWithPadding

def tri_tokenize_function(examples):
    anchor = tri_tokenizer(examples["anchor"], truncation=True,max_length=512,padding="max_length")
    positive = tri_tokenizer(examples["positive"], truncation=True,max_length=512,padding="max_length")
    negative = tri_tokenizer(examples["negative"], truncation=True,max_length=512,padding="max_length")

    return {
        "anchor_input_ids": anchor["input_ids"],
        "anchor_attention_mask": anchor["attention_mask"],
        "positive_input_ids": positive["input_ids"],
        "positive_attention_mask": positive["attention_mask"],
        "negative_input_ids": negative["input_ids"],
        "negative_attention_mask": negative["attention_mask"],
    }

tri_tokenized_datasets = tri_raw_datasets.map(tri_tokenize_function, batched=True)
tri_tokenized_datasets = tri_tokenized_datasets.remove_columns(["anchor", "positive", "negative"])
tri_data_collator = DataCollatorWithPadding(tokenizer=tri_tokenizer)

In [10]:
from transformers import DataCollatorWithPadding
from torch.nn.utils.rnn import pad_sequence
import torch

class CustomDataCollator(DataCollatorWithPadding):
    def __call__(self, features):
        anchor_ids = [torch.tensor(x['anchor_input_ids']) for x in features]
        positive_ids = [torch.tensor(x['positive_input_ids']) for x in features]
        negative_ids = [torch.tensor(x['negative_input_ids']) for x in features]
        
        anchor_mask = [torch.tensor(x['anchor_attention_mask']) for x in features]
        positive_mask = [torch.tensor(x['positive_attention_mask']) for x in features]
        negative_mask = [torch.tensor(x['negative_attention_mask']) for x in features]
        
        anchor_ids = pad_sequence(anchor_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id)
        positive_ids = pad_sequence(positive_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id)
        negative_ids = pad_sequence(negative_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id)
        
        anchor_mask = pad_sequence(anchor_mask, batch_first=True, padding_value=0)
        positive_mask = pad_sequence(positive_mask, batch_first=True, padding_value=0)
        negative_mask = pad_sequence(negative_mask, batch_first=True, padding_value=0)
        
        labels = [torch.tensor(x['labels']) for x in features]
        
        batch = {
            "input_ids": [anchor_ids, positive_ids, negative_ids],
            "attention_mask": [anchor_mask, positive_mask, negative_mask],
            "labels": labels
        }
        
        return batch


tri_data_collator = CustomDataCollator(tokenizer=tri_tokenizer)

In [None]:
from transformers import AutoModel


base_model = AutoModel.from_pretrained("FacebookAI/RoBERTa-base")

In [None]:
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
base_model.to(device)
device

In [14]:
import torch.nn.functional as F
import torch.nn as nn

def triplet_loss(anchor_output, positive_output, negative_output, positive_logits, negative_logits):
    positive_targets = torch.zeros(positive_output.size(0), dtype=torch.long).to(device)  # no hallucination = 0
    negative_targets = torch.ones(negative_output.size(0), dtype=torch.long).to(device)
    positive_loss = nn.CrossEntropyLoss()(positive_logits, positive_targets)
    negative_loss = nn.CrossEntropyLoss()(negative_logits, negative_targets)

    classification_loss = (positive_loss + negative_loss) / 2.0 # average

    triplet_loss_fn = (nn.TripletMarginWithDistanceLoss(margin=1.0,distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y)))
    triplet_loss = triplet_loss_fn(anchor_output, positive_output, negative_output)
   
    return classification_loss, triplet_loss

In [15]:
import torch
import numpy as np

def compute_tri_metrics(eval_pred):
    logits,labels= eval_pred
    
    # ModelOutput(logits=[positive_logits, negative_logits]...)
    positive_logits = torch.tensor(logits[0])
    negative_logits = torch.tensor(logits[1])

    positive_preds = torch.argmax(positive_logits, dim=1)
    negative_preds = torch.argmax(negative_logits, dim=1)

    # num of correct predictions
    correct_positive = (positive_preds == 0).sum().item()
    correct_negative = (negative_preds == 1).sum().item()
    
    # total samples
    total_samples = positive_preds.size(0) + negative_preds.size(0)
    
    positive_preds_num = (positive_preds == 1).sum().item() # predict non-hallucination samples (positive) as hallucination
    negative_preds_num = (negative_preds == 1).sum().item() # predict hallucination samples (negative) as hallucination
    negative_num = negative_preds.size(0) # num of hallucination samples

    precision = negative_preds_num / (positive_preds_num + negative_preds_num) if (positive_preds_num + negative_preds_num) > 0 else 0
    recall = negative_preds_num / negative_num if negative_num > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

    accuracy = (correct_positive + correct_negative) / total_samples
    return {
        "accuracy": accuracy,
        "recall": recall,
        "precision": precision,
        "f1": f1
    }
    


In [None]:
from transformers import TrainingArguments
from transformers import Trainer
import torch
from models_rob import TripletModel

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="steps",  
    save_steps=10000,
    learning_rate=5e-6,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    weight_decay=0.01,
    fp16 = True,
    gradient_accumulation_steps=3,
    logging_dir="./logs",
    remove_unused_columns=False,
    report_to="tensorboard",
)

tri_model = TripletModel(base_model, triplet_loss)
trainer = Trainer(
    model=tri_model,
    args=training_args,
    train_dataset=tri_tokenized_datasets["train"],
    eval_dataset=tri_tokenized_datasets["dev"],
    data_collator=tri_data_collator,
    tokenizer=tri_tokenizer,
    compute_metrics=compute_tri_metrics,
)

In [18]:
trainer.evaluate()

{'eval_loss': 1.6933441162109375,
 'eval_model_preparation_time': 0.0024,
 'eval_accuracy': 0.49851632047477745,
 'eval_recall': 0.005934718100890208,
 'eval_precision': 0.4,
 'eval_f1': 0.011695906432748539,
 'eval_runtime': 4.8403,
 'eval_samples_per_second': 69.624,
 'eval_steps_per_second': 17.561}

In [19]:
trainer.train()

Epoch,Training Loss,Validation Loss,Model Preparation Time,Accuracy,Recall,Precision,F1
0,1.4971,1.207629,0.0024,0.695846,0.724036,0.685393,0.704185
1,1.1525,1.117136,0.0024,0.727003,0.813056,0.693671,0.748634
3,1.004,1.043206,0.0024,0.732938,0.845697,0.690073,0.76
4,0.949,1.178038,0.0024,0.700297,0.928783,0.637475,0.756039
6,0.8728,1.100712,0.0024,0.725519,0.896142,0.668142,0.765526
7,0.8435,1.070615,0.0024,0.743323,0.884273,0.689815,0.775033
9,0.7921,1.118366,0.0024,0.732938,0.910979,0.671772,0.7733


TrainOutput(global_step=3130, training_loss=0.9897091557804388, metrics={'train_runtime': 936.4163, 'train_samples_per_second': 40.153, 'train_steps_per_second': 3.343, 'total_flos': 0.0, 'train_loss': 0.9897091557804388, 'epoch': 9.98936170212766})

In [20]:
trainer.evaluate(eval_dataset=tri_tokenized_datasets["test"])

{'eval_loss': 1.1910415887832642,
 'eval_model_preparation_time': 0.0024,
 'eval_accuracy': 0.7116903633491312,
 'eval_recall': 0.9146919431279621,
 'eval_precision': 0.650561797752809,
 'eval_f1': 0.7603414313854235,
 'eval_runtime': 5.2242,
 'eval_samples_per_second': 121.166,
 'eval_steps_per_second': 30.435,
 'epoch': 9.98936170212766}

In [21]:
def create_dev_task(name):
    dev_data2 = [d for d in test_data if d["task_type"] == name]
    dev_id2 = [d["source_id"] for d in dev_data2]
    dev_id2 = list(set(dev_id2))
    dev_trip2 = create_trip(dev_data2, dev_id2)
    dev_df2 = pd.DataFrame(dev_trip2)
    dev_ds2 = Dataset.from_pandas(dev_df2)
    tri_tokenized_datasets_task = dev_ds2.map(tri_tokenize_function, batched=True)
    tri_tokenized_datasets_task = tri_tokenized_datasets_task.remove_columns(["anchor", "positive", "negative"])
    tri_tokenized_datasets_task.set_format("torch")
    return tri_tokenized_datasets_task

In [22]:
dev_qa = create_dev_task("QA")
trainer.evaluate(eval_dataset=dev_qa)

Map:   0%|          | 0/150 [00:00<?, ? examples/s]

  anchor_ids = [torch.tensor(x['anchor_input_ids']) for x in features]
  positive_ids = [torch.tensor(x['positive_input_ids']) for x in features]
  negative_ids = [torch.tensor(x['negative_input_ids']) for x in features]
  anchor_mask = [torch.tensor(x['anchor_attention_mask']) for x in features]
  positive_mask = [torch.tensor(x['positive_attention_mask']) for x in features]
  negative_mask = [torch.tensor(x['negative_attention_mask']) for x in features]
  labels = [torch.tensor(x['labels']) for x in features]


{'eval_loss': 1.0711263418197632,
 'eval_model_preparation_time': 0.0024,
 'eval_accuracy': 0.7266666666666667,
 'eval_recall': 0.8866666666666667,
 'eval_precision': 0.6717171717171717,
 'eval_f1': 0.7643678160919541,
 'eval_runtime': 1.1535,
 'eval_samples_per_second': 130.034,
 'eval_steps_per_second': 32.942,
 'epoch': 9.98936170212766}

In [23]:
dev_d2t = create_dev_task("Data2txt")
trainer.evaluate(eval_dataset=dev_d2t)

Map:   0%|          | 0/291 [00:00<?, ? examples/s]

  anchor_ids = [torch.tensor(x['anchor_input_ids']) for x in features]
  positive_ids = [torch.tensor(x['positive_input_ids']) for x in features]
  negative_ids = [torch.tensor(x['negative_input_ids']) for x in features]
  anchor_mask = [torch.tensor(x['anchor_attention_mask']) for x in features]
  positive_mask = [torch.tensor(x['positive_attention_mask']) for x in features]
  negative_mask = [torch.tensor(x['negative_attention_mask']) for x in features]
  labels = [torch.tensor(x['labels']) for x in features]


{'eval_loss': 0.8725352883338928,
 'eval_model_preparation_time': 0.0024,
 'eval_accuracy': 0.802405498281787,
 'eval_recall': 0.9243986254295533,
 'eval_precision': 0.7430939226519337,
 'eval_f1': 0.8238897396630935,
 'eval_runtime': 2.2254,
 'eval_samples_per_second': 130.766,
 'eval_steps_per_second': 32.804,
 'epoch': 9.98936170212766}

In [24]:
dev_sum = create_dev_task("Summary")
trainer.evaluate(eval_dataset=dev_sum)

Map:   0%|          | 0/192 [00:00<?, ? examples/s]

  anchor_ids = [torch.tensor(x['anchor_input_ids']) for x in features]
  positive_ids = [torch.tensor(x['positive_input_ids']) for x in features]
  negative_ids = [torch.tensor(x['negative_input_ids']) for x in features]
  anchor_mask = [torch.tensor(x['anchor_attention_mask']) for x in features]
  positive_mask = [torch.tensor(x['positive_attention_mask']) for x in features]
  negative_mask = [torch.tensor(x['negative_attention_mask']) for x in features]
  labels = [torch.tensor(x['labels']) for x in features]


{'eval_loss': 1.7708507776260376,
 'eval_model_preparation_time': 0.0024,
 'eval_accuracy': 0.5625,
 'eval_recall': 0.921875,
 'eval_precision': 0.5363636363636364,
 'eval_f1': 0.6781609195402298,
 'eval_runtime': 1.4686,
 'eval_samples_per_second': 130.739,
 'eval_steps_per_second': 32.685,
 'epoch': 9.98936170212766}

In [37]:
name = "./0102_triplet_rob"
trainer.save_model(name)
trainer.save_state()
tri_model.save_pretrained(name)