<a href="https://colab.research.google.com/github/tcharos/NLP-Toxicity-Detection/blob/main/AIDL_CS01_NLP_Project_task_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AIDL_B_CS01: Advanced NLP Project

## LLM Tuning with DPO (Gordon Ramsay Alignment)

In [None]:
import os
import sys
import shutil
from google.colab import files

IN_COLAB = 'google.colab' in sys.modules
BASE_DIR = "/content" if IN_COLAB else "."
TOXICITY_PATH = os.path.join(BASE_DIR, "data_sets/toxicity")
SEED = 12345

if IN_COLAB:
    from unsloth import FastLanguageModel
else:
    from unsloth_mlx import FastLanguageModel

if IN_COLAB:
    print("Running in Google Colab. Installing NLP stack...")
    !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
    !pip install -q -U "trl<=0.24.0" "datasets==4.3.0" transformers accelerate peft sentence-transformers
else:
    print("Running locally. Checking Mac-specific requirements...")
    !{sys.executable} -m pip install -q "tensorflow==2.16.2" "tensorflow-macos==2.16.2" "tf-keras~=2.16"
    !{sys.executable} -m pip install unsloth-mlx
    !{sys.executable} -m pip install -q -U "trl<=0.24.0" "datasets==4.3.0" transformers accelerate peft sentence-transformers

os.environ["KERAS_BACKEND"] = "tensorflow"

from unsloth import PatchDPOTrainer

import torch
import numpy as np
import pandas as pd
import glob
from datasets import Dataset
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import f1_score, confusion_matrix

from datasets import load_dataset, Dataset
from sentence_transformers import SentenceTransformer, util
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments
)

from peft import LoraConfig, get_peft_model
from trl import DPOConfig, DPOTrainer

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, SpatialDropout1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

print(f"\nTensorFlow Version: {tf.__version__}")
print("Num GPUs Available (TF): ", len(tf.config.list_physical_devices('GPU')))

HAS_MPS = torch.backends.mps.is_available()
HAS_CUDA = torch.cuda.is_available()

if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("PyTorch Device: Mac GPU (Metal)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("PyTorch Device: Colab GPU (CUDA)")
else:
    device = torch.device("cpu")
    print("PyTorch Device: CPU")

### Dataset Preparation

### Functions

In [None]:
def verify_and_get_files(folder, expected_default_name):
    """Checks if a folder has any CSVs; if not, asks for upload."""
    os.makedirs(folder, exist_ok=True)

    # Check if any CSV already exists in the folder
    existing_csvs = glob.glob(os.path.join(folder, "*.csv"))

    if IN_COLAB and not existing_csvs:
        print(f"ðŸ“¥ No CSV found in {folder}. Please upload your file.")
        uploaded = files.upload()
        for filename in uploaded.keys():
            target_path = os.path.join(folder, filename)
            os.rename(filename, target_path)
        # Refresh the list after upload
        existing_csvs = glob.glob(os.path.join(folder, "*.csv"))

    if existing_csvs:
        # Priority: Try to find the exact expected name, otherwise pick the first CSV found
        for f in existing_csvs:
            if os.path.basename(f) == expected_default_name:
                return f
        return existing_csvs[0]
    return None

In [None]:
test_folder_path = './data_sets/Ramsay/test'
val_folder_path = './data_sets/Ramsay/val'

In [None]:
# code i used to contatenate all *.csv to one test.csv - executed only once

# all_csv_files = glob.glob(os.path.join(test_folder_path, "*.csv"))
# valid_dfs = []
# required_cols = ["Question", "Polite", "Ramsay"]

# for f in all_csv_files:
#     try:
#         # Try UTF-8 first, fallback to cp1252 if it fails
#         try:
#             temp_df = pd.read_csv(f, encoding='utf-8')
#         except UnicodeDecodeError:
#             temp_df = pd.read_csv(f, encoding='cp1252')

#         # Check if the required columns exist
#         if all(col in temp_df.columns for col in required_cols):
#             valid_dfs.append(temp_df[required_cols])
#         else:
#             print(f"Skipping {f}: Missing required columns. Found: {temp_df.columns.tolist()}")

#     except Exception as e:
#         print(f"Could not load {f} due to error: {e}")

# # Combine only the valid ones
# if valid_dfs:
#     all_colleagues_data = pd.concat(valid_dfs, ignore_index=True)
#     # Requirement 4: Save to test.csv
#     all_colleagues_data.to_csv("test.csv", index=False)

#     # Take 500 for training
#     train_df = all_colleagues_data.sample(n=min(500, len(all_colleagues_data)), random_state=42)
#     print(f"Successfully loaded {len(valid_dfs)} files.")
#     print(f"Total training rows available: {len(all_colleagues_data)}")
# else:
#     print("No valid CSV files were loaded!")

In [None]:
train_file = verify_and_get_files(test_folder_path, "test.csv")
val_file = verify_and_get_files(val_folder_path, "mscaidl-0077_ramsay_dataset.csv")

In [None]:
if train_file and val_file:
    print(f"Training file located: {train_file}")
    print(f"Validation file located: {val_file}")

    try:
        sample_df = pd.read_csv(val_file, sep=None, engine='python', encoding='utf-8-sig')
        print("\nSuccessfully connected to data. Preview of columns:")
        print(sample_df.columns.tolist())
    except Exception as e:
        print(f"Error reading file: {e}")
else:
    print("Files are missing. If you are not in Colab, please place CSVs in the folders manually.")

In [None]:
def load_any_ramsay_csv(file_path, limit=None, is_train=True):
    # 1. Read raw text and remove double quotes to prevent ParserErrors
    with open(file_path, 'r', encoding='utf-8-sig', errors='ignore') as f:
        content = f.read().replace('"', '')

    df = pd.read_csv(io.StringIO(content), sep=None, engine='python', on_bad_lines='skip')

    df.columns = [c.strip() for c in df.columns]

    required_cols = ["Question", "Polite", "Ramsay"]
    df = df[required_cols]

    if is_train:
        # 500 samples for training
        df = df.sample(n=min(limit, len(df)), random_state=SEED)
    else:
        # first 100 samples for validation
        df = df.head(limit)

    print(f"Successfully loaded {len(df)} rows from {file_path}")

    return Dataset.from_dict({
        "prompt":   df["Question"].astype(str).tolist(),
        "chosen":   df["Polite"].astype(str).tolist(),
        "rejected": df["Ramsay"].astype(str).tolist(),
    }), df

# train dataset
train_dataset, _ = load_any_ramsay_csv(train_file_path, limit=500, is_train=True)

# val dataset
eval_dataset, eval_df_raw = load_any_ramsay_csv(val_file_path, limit=100, is_train=False)

### SLM from usloath (not Zephyr)



In [None]:
model_name = "unsloth/Llama-3.2-3B-Instruct"
#model_name = "unsloth/Llama-3.2-1B-Instruct"

max_seq_length = 2048 # Can handle longer contexts if needed
dtype = None # Auto-detect (Float16 or Bfloat16)
load_in_4bit = True # Essential for DPO memory efficiency

# 2. Load the model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# 3. Add LoRA Adapters
# This allows us to train the model efficiently by only updating a small percentage of weights
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Rank
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Optimized for Unsloth
    bias = "none",    # Optimized for Unsloth
    use_gradient_checkpointing = "unsloth", # Reduces VRAM usage
    random_state = 3407,
)

print(f"Model {model_name} loaded successfully with LoRA adapters.")

In [None]:
BASE_CONFIG = {
    'per_device_train_batch_size': 2,
    'gradient_accumulation_steps': 4,
    'warmup_ratio': 0.1,
    'num_train_epochs': 3,
    'learning_rate': 5e-5,
    'logging_steps': 1,
    'optim': "paged_adamw_32bit",
    'weight_decay': 0.01,
    'lr_scheduler_type': "linear",
    'beta': 0.2,
    'max_prompt_length': 512,
    'max_length': 1024,
}

In [None]:
PatchDPOTrainer()

training_args = DPOConfig(
    per_device_train_batch_size = BASE_CONFIG['per_device_train_batch_size'],
    gradient_accumulation_steps = BASE_CONFIG['gradient_accumulation_steps'],
    warmup_ratio                = BASE_CONFIG['warmup_ratio'],
    num_train_epochs            = BASE_CONFIG['num_train_epochs'],
    learning_rate               = BASE_CONFIG['learning_rate'],
    fp16                        = not torch.cuda.is_bf16_supported(),
    bf16                        = torch.cuda.is_bf16_supported(),
    logging_steps               = 1,
    optim                       = BASE_CONFIG['optim'],
    weight_decay                = BASE_CONFIG['weight_decay'],
    lr_scheduler_type           = BASE_CONFIG['lr_scheduler_type'],
    seed                        = SEED,
    output_dir                  = "outputs",
    eval_strategy               = "steps",
    eval_steps                  = 10,
    report_to                   = "none",

    # DPO specific requirements inside the config
    beta                        = BASE_CONFIG['beta'],
    max_prompt_length           = BASE_CONFIG['max_prompt_length'],
    max_length                  = BASE_CONFIG['max_length'],
)

dpo_trainer = DPOTrainer(
    model = model,
    ref_model = None,
    args = training_args,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    tokenizer = tokenizer,
    beta = 0.1,
    max_prompt_length = 512,
    max_length = 1024,
)

print("--- Training Started ---")
dpo_trainer.train()

In [None]:
FastLanguageModel.for_inference(model)

sim_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

questions = eval_df_raw["Question"].tolist()
prompts = [f"Question: {q}\nResponse:" for q in questions]

print(f"Generating responses for {len(questions)} questions...")
inputs = tokenizer(prompts, return_tensors="pt", padding=True).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)

# Clean up responses
model_results = [text.split("Response:")[-1].strip() for text in decoded_outputs]
eval_df_raw["Model_Result"] = model_results

print("Calculating Cosine Similarity...")
model_embeddings = sim_model.encode(eval_df_raw["Model_Result"].tolist(), convert_to_tensor=True)
polite_embeddings = sim_model.encode(eval_df_raw["Polite"].tolist(), convert_to_tensor=True)

# how close the model got to the "Polite" target
cosine_scores = util.cos_sim(model_embeddings, polite_embeddings)
eval_df_raw["Similarity_Score"] = torch.diag(cosine_scores).cpu().tolist()

eval_df_raw.to_csv("mscaidl-0077_task_5_results.csv", index=False)

avg_sim = eval_df_raw['Similarity_Score'].mean()
print(f"âœ… Process Complete!")
print(f"ðŸ“Š Average Similarity Score: {avg_sim:.4f}")

In [None]:
model.save_pretrained_merged("dpo_ramsay_model", tokenizer, save_method = "merged_16bit")

In [None]:
# this step takes ~15 mins because it saves the full model ~5GB
# we could save only LoRA (the changes we made at the DPO step)

# shutil.make_archive("dpo_ramsay_model", 'zip', "dpo_ramsay_model")

# files.download("dpo_ramsay_model.zip")

In [None]:
# LoRA save only

model.save_pretrained("dpo_ramsay_lora_only")
tokenizer.save_pretrained("dpo_ramsay_lora_only")

shutil.make_archive("dpo_ramsay_lora_only", 'zip', "dpo_ramsay_lora_only")

files.download("dpo_ramsay_lora_only.zip")