In [None]:
!pip install evaluate bert-score datasets transformers torch tqdm

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate, bert-score
Successfully installed bert-score-0.3.13 evaluate-0.4.6


In [None]:
from datasets import load_dataset, Dataset
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score, roc_auc_score, average_precision_score, accuracy_score, classification_report
from sklearn.metrics import precision_score, recall_score

from sklearn.model_selection import train_test_split

import json
from pathlib import Path

import torch
from tqdm import tqdm
from transformers import TrainingArguments, Trainer
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, AutoModel
from bert_score import score as bertscore
import evaluate

In [None]:
# mount drive to access dataset
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
X_train_path = "/content/drive/MyDrive/NLP/LLM_Hallucination/X_train.csv"
X_val_path = "/content/drive/MyDrive/NLP/LLM_Hallucination/X_val.csv"
# X_test_path = "/content/drive/MyDrive/NLP/LLM_Hallucination/X_test.csv"
X_test_path = "/content/drive/MyDrive/NLP/LLM_Hallucination/halueval_summarization_paraphrased.csv"
y_train_path = "/content/drive/MyDrive/NLP/LLM_Hallucination/y_train.csv"
y_val_path = "/content/drive/MyDrive/NLP/LLM_Hallucination/y_val.csv"
y_test_path = "/content/drive/MyDrive/NLP/LLM_Hallucination/y_test.csv"


X_train = pd.read_csv(X_train_path)
X_val = pd.read_csv(X_val_path)
X_test = pd.read_csv(X_test_path)
y_train = pd.read_csv(y_train_path)
y_val = pd.read_csv(y_val_path)
y_test = pd.read_csv(y_test_path)

print(f"Training set size: {len(X_train)} samples")
print(f"Validation set size: {len(X_val)} samples")
print(f"Testing set size: {len(X_test)} samples")
print(f"Training labels size: {len(y_train)} samples")
print(f"Validation labels size: {len(y_val)} samples")
print(f"Testing labels size: {len(y_test)} samples")

Training set size: 8000 samples
Validation set size: 1000 samples
Testing set size: 1000 samples
Training labels size: 8000 samples
Validation labels size: 1000 samples
Testing labels size: 1000 samples


In [None]:
model_name = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
def preprocess_dataset(data, tokenizer, summary_column="summary"):
  """
  Tokenizes the document and summary pairs using the provided tokenizer.
  """
  # Combine document and summary into a list of text pairs for batched tokenization
  text_pairs = list(zip(data["document"], data[summary_column]))

  encoded_inputs = tokenizer(
      text_pairs, # Pass the list of (document, summary) tuples
      max_length=MAX_LENGTH,
      padding="max_length",
      truncation=True,
      return_tensors="pt" # Return PyTorch tensors
  )

  # For a sequence classification task, we also need to include the labels.
  # Assuming 'label_binary' is the column containing the classification labels (0 or 1).
  # Convert the label to a PyTorch tensor and ensure it's a long type for classification loss functions.
  encoded_inputs["labels"] = torch.tensor(data['label_binary'], dtype=torch.long)

  return encoded_inputs

In [None]:
metric = evaluate.load('accuracy')

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:
def fine_tune_deberta_model(
    model,
    tokenizer,
    train_data_X, # This is X_train DataFrame
    train_data_y, # This is y_train Series
    val_data_X,   # This is X_val DataFrame
    val_data_y,   # This is y_val Series
    batch_size = 16,
    num_epochs = 2
):
  """
  Preprocesses data using the selected tokenizer and fine-tunes a pre-trained model.
  """
  # Combine X and y into a single DataFrame for the Dataset conversion
  train_df = train_data_X.copy()
  train_df['label_binary'] = train_data_y

  val_df = val_data_X.copy()
  val_df['label_binary'] = val_data_y

  # Convert pandas DataFrames to datasets.Dataset objects
  train_dataset = Dataset.from_pandas(train_df)
  val_dataset = Dataset.from_pandas(val_df)

  preprocessed_train_data = train_dataset.map(preprocess_dataset, batched=True, fn_kwargs={'tokenizer': tokenizer})
  preprocessed_val_data = val_dataset.map(preprocess_dataset, batched=True, fn_kwargs={'tokenizer': tokenizer})

  training_args = TrainingArguments(
      output_dir="/content/drive/MyDrive/NLP/LLM_Hallucination/deberta_hallucinations",
      per_device_train_batch_size=batch_size,
      per_device_eval_batch_size=batch_size,
      num_train_epochs=num_epochs,
      eval_strategy="epoch",
      save_strategy="epoch",
      report_to="none"
  )

  trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=preprocessed_train_data,
      eval_dataset=preprocessed_val_data,
      compute_metrics=compute_metrics
  )

  trainer.train()

In [None]:
MAX_LENGTH = 512

In [None]:
fine_tune_deberta_model(model, tokenizer, X_train, y_train, X_val, y_val)

model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2353,0.114754,0.97
2,0.0448,0.101928,0.977


In [None]:
# reload fine-tuned model
model_path = "/content/drive/MyDrive/NLP/LLM_Hallucination/deberta_hallucinations/checkpoint-1000"
model = AutoModelForSequenceClassification.from_pretrained(model_path)

In [None]:
# Prepare the test dataset
test_df = X_test.copy()
test_df['label_binary'] = y_test
test_dataset = Dataset.from_pandas(test_df)
preprocessed_test_data = test_dataset.map(preprocess_dataset, batched=True, fn_kwargs={'tokenizer': tokenizer})

# Re-instantiate TrainingArguments for evaluation (output_dir is needed)
eval_args = TrainingArguments(
    output_dir="deberta_hallucinations_eval", # A different directory for eval outputs if desired
    per_device_eval_batch_size=16,
    report_to="none"
)

# Instantiate a Trainer with the fine-tuned model for prediction
eval_trainer = Trainer(
    model=model,
    args=eval_args
)

# Get predictions on the test set
predictions_output = eval_trainer.predict(preprocessed_test_data)

# Extract predicted logits and true labels
predictions_logits = predictions_output.predictions
true_labels = predictions_output.label_ids

# Convert logits to predicted labels (0 or 1)
predicted_labels = np.argmax(predictions_logits, axis=1)

# Calculate metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='binary')
recall = recall_score(true_labels, predicted_labels, average='binary')
f1 = f1_score(true_labels, predicted_labels, average='binary')

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-Score: {f1:.4f}")

# Optionally, print a full classification report
print("\nClassification Report:")
print(classification_report(true_labels, predicted_labels))


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Test Accuracy: 0.9710
Test Precision: 0.9667
Test Recall: 0.9762
Test F1-Score: 0.9714

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.97      0.97       495
           1       0.97      0.98      0.97       505

    accuracy                           0.97      1000
   macro avg       0.97      0.97      0.97      1000
weighted avg       0.97      0.97      0.97      1000



In [None]:
para_test_df = X_test.copy()
para_test_df['label_binary'] = y_test
para_test_dataset = Dataset.from_pandas(para_test_df)
preprocessed_test_data_para = test_dataset.map(
    preprocess_dataset,
    batched=True,
    fn_kwargs={"tokenizer": tokenizer, "summary_column": "summary_paraphrased"},
)

# Re-instantiate TrainingArguments for evaluation (output_dir is needed)
eval_args = TrainingArguments(
    output_dir="deberta_hallucinations_eval", # A different directory for eval outputs if desired
    per_device_eval_batch_size=16,
    report_to="none"
)

# Instantiate a Trainer with the fine-tuned model for prediction
eval_trainer = Trainer(
    model=model,
    args=eval_args
)

# Get predictions on the test set
predictions_output = eval_trainer.predict(preprocessed_test_data_para)

# Extract predicted logits and true labels
predictions_logits = predictions_output.predictions
true_labels = predictions_output.label_ids

# Convert logits to predicted labels (0 or 1)
predicted_labels = np.argmax(predictions_logits, axis=1)

# Calculate metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='binary')
recall = recall_score(true_labels, predicted_labels, average='binary')
f1 = f1_score(true_labels, predicted_labels, average='binary')

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1-Score: {f1:.4f}")

# Optionally, print a full classification report
print("\nClassification Report:")
print(classification_report(true_labels, predicted_labels))

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Test Accuracy: 0.8990
Test Precision: 0.8556
Test Recall: 0.9624
Test F1-Score: 0.9059

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.83      0.89       495
           1       0.86      0.96      0.91       505

    accuracy                           0.90      1000
   macro avg       0.91      0.90      0.90      1000
weighted avg       0.91      0.90      0.90      1000

