In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from datasets import Dataset 
import pandas as pd
import numpy as np
import torch
import evaluate 
import os 

In [2]:
data = pd.read_csv('../data/data_preprocessed.csv')
dataset = Dataset.from_pandas(data)

In [3]:
# Loading pre-trained model and tokenizer 

model = AutoModelForSequenceClassification.from_pretrained('JamesH/Movie_review_sentiment_analysis_model')
tokenizer = AutoTokenizer.from_pretrained('JamesH/Movie_review_sentiment_analysis_model')
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [4]:
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

In [5]:
# Tokenizing the dataset before train/test splitting and selecting a small subset for model fine tuning 

tokenized_ds = dataset.map(tokenize_function, batched=True)
tokenized_ds = tokenized_ds.train_test_split(test_size=0.2)

small_train = tokenized_ds['train'].shuffle(seed=13).select(range(200))
small_test = tokenized_ds['test'].shuffle(seed=13).select(range(200))

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no padding.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [6]:
current_dir = os.getcwd()
main_dir = os.path.abspath(os.path.join(current_dir, '..'))
models_dir = os.path.abspath(os.path.join(main_dir, 'models')) # Creating a new directory for saving models 

In [7]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred 
    predictions = np.argmax(logits, axis=-1)
    
    # Evaluation metrics for classification
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average=None)
    acc = accuracy_score(labels, predictions)
    auc = roc_auc_score(labels, logits[:, 1])

    return {
        "accuracy": acc,
        "f1": f1,
        "precision": precision,
        "recall": recall,
        "auc": auc
        }

In [8]:
training_args = TrainingArguments(
    output_dir=models_dir,
    save_strategy='no',
    eval_strategy='epoch',
    num_train_epochs=4,
    learning_rate=2e-5,
    per_device_train_batch_size=2,                                                         
    per_device_eval_batch_size=2, 
    fp16=True,
    warmup_ratio=0.1,
    optim="adamw_8bit",
)

In [9]:
trainer = Trainer(
    model=model,                        # Your model (e.g., BertForSequenceClassification)
    args=training_args,                # TrainingArguments
    train_dataset=small_train,      # Your training dataset
    eval_dataset=small_test,        # Optional: for evaluation
    data_collator=data_collator,              # Optional: if using Hugging Face tokenizer
    compute_metrics=compute_metrics,  # Optional: your custom metric function
)

In [10]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Auc
1,No log,0.219215,0.95,[0.94897959 0.95098039],[0.93939394 0.96039604],[0.95876289 0.94174757],0.987088
2,No log,0.49138,0.915,[0.9178744 0.9119171],[0.86363636 0.97777778],[0.97938144 0.85436893],0.970473
3,No log,0.410881,0.94,[0.94 0.94],[0.91262136 0.96907216],[0.96907216 0.91262136],0.978381
4,No log,0.432977,0.935,[0.93532338 0.93467337],[0.90384615 0.96875 ],[0.96907216 0.90291262],0.97748


TrainOutput(global_step=400, training_loss=0.2230299186706543, metrics={'train_runtime': 188.5695, 'train_samples_per_second': 4.242, 'train_steps_per_second': 2.121, 'total_flos': 165157565750472.0, 'train_loss': 0.2230299186706543, 'epoch': 4.0})

In [11]:
trainer.evaluate()

{'eval_loss': 0.43297669291496277,
 'eval_accuracy': 0.935,
 'eval_f1': array([0.93532338, 0.93467337]),
 'eval_precision': array([0.90384615, 0.96875   ]),
 'eval_recall': array([0.96907216, 0.90291262]),
 'eval_auc': 0.9774797317585827,
 'eval_runtime': 10.5516,
 'eval_samples_per_second': 18.954,
 'eval_steps_per_second': 9.477,
 'epoch': 4.0}

In [12]:
def op_compute_metrics(eval_pred):
    logits, labels = eval_pred 
    predictions = np.argmax(logits, axis=-1)
    
    # Evaluation metrics for classification
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average=None)
    acc = accuracy_score(labels, predictions)

    return {
        "accuracy": float(acc),
        "precision_class_0": float(precision[0]),
        "precision_class_1": float(precision[1]),
        "recall_class_0": float(recall[0]),
        "recall_class_1": float(recall[1]),
        "f1_class_0": float(f1[0]),
        "f1_class_1": float(f1[1])
        }

In [14]:
op_training_args = TrainingArguments(
    output_dir=models_dir,
    save_strategy='no',
    eval_strategy='epoch',
    num_train_epochs=4, # no change from epoch 3 to epoch 4 during initial training
    learning_rate=2e-5,
    per_device_train_batch_size=2,                                                         
    per_device_eval_batch_size=2, 
    fp16=True,
    warmup_ratio=0.1,
    optim="adamw_8bit",
)

In [15]:
# Optimizing hyperparameters 

def model_init():
    return AutoModelForSequenceClassification.from_pretrained('JamesH/Movie_review_sentiment_analysis_model')

trainer = Trainer(
    model_init=model_init,
    args=op_training_args,
    train_dataset=small_train,
    eval_dataset=small_test,
    data_collator=data_collator,
    compute_metrics=op_compute_metrics,
)
# Define hyperparameter search space
def hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-5, 5e-4, log=True),
        "warmup_ratio": trial.suggest_float("warmup_ratio", 0.0, 0.1),
        "num_train_epochs": trial.suggest_int("num_train_epochs", 2, 5),
        "gradient_accumulation_steps": trial.suggest_categorical("gradient_accumulation_steps", [1, 2, 4, 8])
        
    }


best_trial = trainer.hyperparameter_search(
    direction="maximize",  # Maximize metric 
    hp_space=hp_space,
    backend="optuna",
    n_trials=5# Number of trials to run
)

print(f"Best trial: {best_trial}")

[I 2025-07-06 00:07:27,088] A new study created in memory with name: no-name-a46bca1a-c203-4330-87f5-519b2f37fd71


Epoch,Training Loss,Validation Loss,Accuracy,Precision Class 0,Precision Class 1,Recall Class 0,Recall Class 1,F1 Class 0,F1 Class 1
1,No log,1.740802,0.57,0.530055,1.0,1.0,0.165049,0.692857,0.283333
2,No log,0.251927,0.925,0.886792,0.968085,0.969072,0.883495,0.926108,0.923858
3,No log,0.362987,0.92,0.878505,0.967742,0.969072,0.873786,0.921569,0.918367


[I 2025-07-06 00:12:04,034] Trial 0 finished with value: 6.449041155486267 and parameters: {'learning_rate': 7.200613184257478e-05, 'warmup_ratio': 0.06343566675585444, 'num_train_epochs': 3, 'gradient_accumulation_steps': 8}. Best is trial 0 with value: 6.449041155486267.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Class 0,Precision Class 1,Recall Class 0,Recall Class 1,F1 Class 0,F1 Class 1
1,No log,0.768023,0.825,0.738462,0.985714,0.989691,0.669903,0.845815,0.797688
2,No log,0.303006,0.94,0.920792,0.959596,0.958763,0.92233,0.939394,0.940594
3,No log,0.319054,0.94,0.920792,0.959596,0.958763,0.92233,0.939394,0.940594
4,No log,0.329368,0.94,0.920792,0.959596,0.958763,0.92233,0.939394,0.940594


[I 2025-07-06 00:22:06,766] Trial 1 finished with value: 6.581469021289077 and parameters: {'learning_rate': 1.4373369541741355e-05, 'warmup_ratio': 0.002241635644796569, 'num_train_epochs': 4, 'gradient_accumulation_steps': 2}. Best is trial 1 with value: 6.581469021289077.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Class 0,Precision Class 1,Recall Class 0,Recall Class 1,F1 Class 0,F1 Class 1
1,No log,1.042246,0.79,0.70073,0.984127,0.989691,0.601942,0.820513,0.746988
2,No log,0.321819,0.95,0.930693,0.969697,0.969072,0.932039,0.949495,0.950495
3,No log,0.362938,0.945,0.921569,0.969388,0.969072,0.92233,0.944724,0.945274


[I 2025-07-06 00:41:43,494] Trial 2 finished with value: 6.617355894520101 and parameters: {'learning_rate': 1.8298827638569346e-05, 'warmup_ratio': 0.06384813740224214, 'num_train_epochs': 3, 'gradient_accumulation_steps': 1}. Best is trial 2 with value: 6.617355894520101.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Class 0,Precision Class 1,Recall Class 0,Recall Class 1,F1 Class 0,F1 Class 1
1,No log,0.426808,0.885,0.95122,0.838983,0.804124,0.961165,0.871508,0.895928
2,No log,0.262356,0.93,0.936842,0.92381,0.917526,0.941748,0.927083,0.932692


[I 2025-07-06 00:46:30,784] Trial 3 finished with value: 6.5097006161097335 and parameters: {'learning_rate': 7.084851738585837e-05, 'warmup_ratio': 0.06880538794097325, 'num_train_epochs': 2, 'gradient_accumulation_steps': 8}. Best is trial 2 with value: 6.617355894520101.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Class 0,Precision Class 1,Recall Class 0,Recall Class 1,F1 Class 0,F1 Class 1
1,No log,0.21171,0.95,0.930693,0.969697,0.969072,0.932039,0.949495,0.950495
2,No log,0.255989,0.945,0.921569,0.969388,0.969072,0.92233,0.944724,0.945274
3,No log,0.270911,0.945,0.93,0.96,0.958763,0.932039,0.944162,0.945813
4,No log,0.24458,0.95,0.948454,0.951456,0.948454,0.951456,0.948454,0.951456


[I 2025-07-06 01:33:22,645] Trial 4 finished with value: 6.6497297567811025 and parameters: {'learning_rate': 1.2090707659718472e-05, 'warmup_ratio': 0.09356655990374085, 'num_train_epochs': 4, 'gradient_accumulation_steps': 4}. Best is trial 4 with value: 6.6497297567811025.


Best trial: BestRun(run_id='4', objective=6.6497297567811025, hyperparameters={'learning_rate': 1.2090707659718472e-05, 'warmup_ratio': 0.09356655990374085, 'num_train_epochs': 4, 'gradient_accumulation_steps': 4}, run_summary=None)


In [16]:
# Creating a subdirectory for the tuned model and saving it 

ft_model = os.path.abspath(os.path.join(models_dir, 'finetuned_model'))
  
os.makedirs(ft_model, exist_ok=True)

model.save_pretrained(ft_model)
tokenizer.save_pretrained(ft_model)         

Non-default generation parameters: {'max_length': 512}


('C:\\Users\\samhk\\LLM-Project\\models\\finetuned_model\\tokenizer_config.json',
 'C:\\Users\\samhk\\LLM-Project\\models\\finetuned_model\\special_tokens_map.json',
 'C:\\Users\\samhk\\LLM-Project\\models\\finetuned_model\\tokenizer.json')