In [None]:
!pip install evaluate

In [None]:
import torch
import gc
import random
import numpy as np
import pandas as pd
import evaluate
from typing import Dict, Any, List, Optional, Tuple
from dataclasses import dataclass, asdict

# Hugging Face Libraries
from datasets import load_dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    EvalPrediction
)
from peft import LoraConfig, TaskType, get_peft_model

In [None]:
SEED = 42
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Hyperparameter Search Space -> discrete sets
LR_MIN, LR_MAX = 1e-5, 2e-4
WARMUP_OPTIONS = [0.0, 0.06, 0.1]
RANK_OPTIONS = [2, 4, 8, 16, 24]
ALPHA_OPTIONS = [8, 16, 32, 64, 96]
DROPOUT_OPTIONS = [0.0, 0.05, 0.1, 0.2]
TARGET_MODULE_OPTIONS = [
    ["q_lin", "v_lin"],
    ["q_lin", "v_lin", "ffn.lin1", "ffn.lin2"]
]  # -> binary choice

# Search bounds -> indices for discrete, actual values for continuous params
MIN_BOUNDS = [LR_MIN, 0, 0, 0, 0, 0]
MAX_BOUNDS = [
    LR_MAX,
    len(WARMUP_OPTIONS) - 0.01,
    len(RANK_OPTIONS) - 0.01,
    len(ALPHA_OPTIONS) - 0.01,
    len(DROPOUT_OPTIONS) - 0.01,
    len(TARGET_MODULE_OPTIONS) - 0.01
]

In [None]:
# Data Loading
dataset = load_dataset('dair-ai/emotion')

train_dataset = dataset['train'].shuffle(seed=42).select(range(3000)) # recommendation from cw brief to reduce compute time
val_dataset = dataset['validation']

In [None]:
# Tokenization
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_func(examples):
  return tokenizer(
      examples['text'],
      truncation=True,
      padding=True,
      max_length=128
  )

tokenized_train = train_dataset.map(tokenize_func, batched=True)
tokenized_val = val_dataset.map(tokenize_func, batched=True)

In [None]:
# Helper functions
def set_global_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)


def cleanup_memory():
    """Forcefully releases GPU memory"""
    torch.cuda.empty_cache()
    gc.collect()


def find_nearest(value, options):
    return min(options, key=lambda x: abs(x - value)) # find nearest value from discrete set


def random_individual() -> List[float]: # -> generate one vector of random (index based) hyperparams from options
    return [
        random.uniform(LR_MIN, LR_MAX), # LR -> continuous
        random.uniform(0, len(WARMUP_OPTIONS) - 0.01), # warmup index
        random.uniform(0, len(RANK_OPTIONS) - 0.01), # rank index
        random.uniform(0, len(ALPHA_OPTIONS) - 0.01), # alpha index
        random.uniform(0, len(DROPOUT_OPTIONS) - 0.01), # dropout index
        random.uniform(0, len(TARGET_MODULE_OPTIONS) - 0.01) # modules index
    ]


# takes in an individual and repairs it -> need to change name later
def repair_individual(pop_list: list) -> list:
    """Repair bounds and snap to valid discrete values"""
    repaired = []
    
    # LR - continuous, just clip
    repaired.append(float(np.clip(pop_list[0], MIN_BOUNDS[0], MAX_BOUNDS[0])))
    
    # discrete params - clip index then map to actual value
    repaired.append(WARMUP_OPTIONS[int(np.clip(pop_list[1], MIN_BOUNDS[1], MAX_BOUNDS[1]))])
    repaired.append(RANK_OPTIONS[int(np.clip(pop_list[2], MIN_BOUNDS[2], MAX_BOUNDS[2]))])
    repaired.append(ALPHA_OPTIONS[int(np.clip(pop_list[3], MIN_BOUNDS[3], MAX_BOUNDS[3]))])
    repaired.append(DROPOUT_OPTIONS[int(np.clip(pop_list[4], MIN_BOUNDS[4], MAX_BOUNDS[4]))])
    repaired.append(int(np.clip(round(pop_list[5]), MIN_BOUNDS[5], MAX_BOUNDS[5])))
    
    return repaired

In [None]:
# NSGA2 Settings

NGENS = 5
CXPB = 0.9
POPSIZE = 20

In [None]:
# DEAP imports
%pip install deap
from deap import base, benchmarks, algorithms, creator, tools
from deap.benchmarks.tools import diversity, convergence, hypervolume

In [None]:
import matplotlib.pyplot as plt
import os

class NSGA2_HyperparameterOptimizer:

    def __init__(self):
        self.metric = evaluate.load("accuracy")
        self.pop_size = POPSIZE
        self.ngens = NGENS
        self.cxpb = CXPB



        self.results = []
        self.nfes = 0 # Number of function evaluations (trial_id)
        self.best_solution = None
        self.best_accuracy = 0.0
        self.all_individuals = {}
        
        self.final_population = None
        


    def _compute_metrics(self, eval_pred: EvalPrediction):
        preds, labels = eval_pred
        preds = np.argmax(preds, axis=1)
        return self.metric.compute(predictions=preds, references=labels)

    def evaluate_individual(self, individual: List[float], trial_id: int):
        """Train model with given hyperparameters and return validation accuracy"""
        params = repair_individual(individual)

        print(f"   > LR={params[0]:.2e}, Rank={params[2]}, "
              f"Alpha={params[3]}, Dropout={params[4]}")

        # Load fresh model each time
        model = AutoModelForSequenceClassification.from_pretrained(
          "distilbert-base-uncased",
          num_labels=6 # for 6 emotions
        )

        peft_config = LoraConfig(
            task_type=TaskType.SEQ_CLS,
            r=params[2],
            lora_alpha=params[3],
            lora_dropout=params[4],
            target_modules=["q_lin", "v_lin"] if params[5]==0 else ["q_lin", "v_lin", "ffn.lin1", "ffn.lin2"],
        )

        peft_model = get_peft_model(model, peft_config)
        args = TrainingArguments(
            output_dir=f"./results/trial_{trial_id}",
            learning_rate=params[0],
            per_device_train_batch_size=16,
            per_device_eval_batch_size=32,
            num_train_epochs=3,
            warmup_ratio=params[1],
            logging_steps = 100,
            weight_decay=0.01,
            eval_strategy="epoch",
            save_strategy="no",
            logging_strategy="epoch",
            seed=SEED + trial_id,
            report_to="none",
            load_best_model_at_end=False
        )

        data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
        trainer = Trainer(
            model=peft_model,
            args=args,
            train_dataset=tokenized_train,
            data_collator=data_collator,
            eval_dataset=tokenized_val,
            compute_metrics=self._compute_metrics
        )

        num_trainable_params = sum(p.numel() for p in peft_model.parameters() if p.requires_grad)
        print(f"Number of trainable params: {num_trainable_params}")

        trainer.train()
        eval_results = trainer.evaluate()
        accuracy = eval_results["eval_accuracy"]

        # Cleanup
        del model, peft_model, trainer
        cleanup_memory()

        return accuracy, num_trainable_params


    def get_pareto_front(self, population=None):
        """
        Extract the Pareto front (non-dominated solutions) from a population.
        Uses DEAP's sortNondominated to get the first front.
        """
        if population is None:
            population = self.final_population
        
        if population is None:
            raise ValueError("No population available. Run optimization first.")
        
        # sortNondominated returns list of fronts; first_front_only=True gives just front 0
        pareto_fronts = tools.sortNondominated(population, len(population), first_front_only=True)
        return pareto_fronts[0]  # Return the first (Pareto-optimal) front


    def run_optimization(self):
        # Ensure creator classes exist
        if not hasattr(creator, "FitnessMulti"):
            creator.create("FitnessMulti", base.Fitness, weights=(1.0,-1.0))
        if not hasattr(creator, "Individual"):
            creator.create("Individual", list, fitness=creator.FitnessMulti)

        toolbox = base.Toolbox()
        # Use initIterate so random_individual's list return value becomes the individual
        toolbox.register("individual", tools.initIterate, creator.Individual, random_individual)
        toolbox.register("population", tools.initRepeat, list, toolbox.individual)
        pop = toolbox.population(n=self.pop_size)

        toolbox.register("evaluate", self.evaluate_individual)

        toolbox.register("mate", tools.cxTwoPoint)
        toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.1, indpb=0.1)
        toolbox.register("select", tools.selNSGA2)

        # the logbook stores the results
        logbook = tools.Logbook()
        # Add tracking columns to logbook
        logbook.header = ["gen", "evals"] + ["avg", "std", "min", "max"]

        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("avg", np.mean, axis=0)
        stats.register("std", np.std, axis=0)
        stats.register("min", np.min, axis=0)
        stats.register("max", np.max, axis=0)

        def track_result(individual, fitness_val, gen):
            params = repair_individual(individual)
            param_dict = {
              "learning_rate": params[0],
              "warmup_ratio": params[1],
              "rank": params[2],
              "alpha": params[3],
              "dropout": params[4],
              "target_modules": ["q_lin", "v_lin"] if params[5]==0 else ["q_lin", "v_lin", "ffn.lin1", "ffn.lin2"]
            }

            # Check for best solution
            if fitness_val[0] > self.best_accuracy:
                self.best_accuracy = fitness_val[0]
                self.best_solution = param_dict

            record = param_dict.copy()
            record.update({
                "trial_id": self.nfes,
                "generation": gen,
                "accuracy": fitness_val[0],
                "trainable_params": fitness_val[1]
            })
            self.results.append(record)


        # INITIALISATION
        print(f"Generation 0/{self.ngens}...")
        for i, ind in enumerate(pop):
            self.nfes += 1
            print(f"Evaluating individual {i+1}/{self.pop_size} (Trial {self.nfes})...")

            fit = self.evaluate_individual(ind, self.nfes)
            ind.fitness.values = fit
            track_result(ind, fit, 0)

            print(f"Individual {i+1} acc: {fit[0]:.4f}, params: {fit[1]}")

        # adjust crowding distance (NSGA2 requirement)
        pop = toolbox.select(pop, len(pop))

        # get stats
        record = stats.compile(pop)
        logbook.record(gen=0, evals=len(pop), **record)
        print(logbook.stream)
        print(f"   >> BEST so far: {self.best_accuracy:.4%} | Params: {self.best_solution}")

        # MAIN LOOP -> begin generational process
        for generation in range(1, self.ngens):
            print(f"\nGeneration {generation}/{self.ngens}...")
          # selTournamentDCD() - DCD -> DCD means Dominant based and Crowding Distance based
            offspring = tools.selTournamentDCD(pop, len(pop))
            offspring = [toolbox.clone(ind) for ind in offspring]

            for ind1, ind2 in zip(offspring[::2], offspring[1::2]):
                if random.random() <= self.cxpb:
                    toolbox.mate(ind1, ind2)

                toolbox.mutate(ind1)
                toolbox.mutate(ind2)
                del ind1.fitness.values, ind2.fitness.values

            # in case of invalid fitness, re-evaluate individual's fitness
            invalid_ind = [ind for ind in offspring if not ind.fitness.valid]

            # Evaluate sequentially to track properly
            for ind in invalid_ind:
                self.nfes += 1
                print(f"Evaluating offspring (Trial {self.nfes})...")
                fit = self.evaluate_individual(ind, self.nfes)
                ind.fitness.values = fit
                track_result(ind, fit, generation)

            # Selecting population to be passed to next generation
            pop = toolbox.select(pop + offspring, self.pop_size)
            record = stats.compile(pop)
            logbook.record(gen=generation, evals=len(invalid_ind), **record)
            print(logbook.stream) # being printed at end of every generation so as to evaluate
            print(f"   >> BEST so far: {self.best_accuracy:.4%} | Params: {self.best_solution}")
    

        self.final_population = pop


        try:
            print("Final population hypervolume (measures quality on Paretto front)", hypervolume(pop, [0.0, 2000000]) )
        except Exception as e:
            print(f"Hypervolume calc failed: {e}")
        
        return pop
            
            
    def plot_pareto_front(self, filename: str = "pareto_front.png"):
        """
        Visualize the Pareto front: accuracy vs trainable parameters.
        """
        if self.final_population is None:
            print("No final population available.")
            return
        
        pareto_front = self.get_pareto_front()
        
        # Extract fitness values
        accuracies = [ind.fitness.values[0] for ind in pareto_front]
        params = [ind.fitness.values[1] for ind in pareto_front]
        
        # Also plot all evaluated solutions for context
        all_acc = [r['accuracy'] for r in self.results]
        all_params = [r['trainable_params'] for r in self.results]
        
        plt.figure(figsize=(10, 7))
        
        # Plot all solutions (grey)
        plt.scatter(all_params, all_acc, c='lightgray', alpha=0.5, 
                   label='All Evaluated', s=30)
        
        # Plot Pareto front (red, larger)
        plt.scatter(params, accuracies, c='red', s=100, 
                   label='Pareto Front', edgecolors='black', zorder=5)
        
        # Connect Pareto front points with a line
        sorted_pairs = sorted(zip(params, accuracies))
        sorted_params, sorted_acc = zip(*sorted_pairs)
        plt.plot(sorted_params, sorted_acc, 'r--', alpha=0.7, linewidth=2)
        
        plt.xlabel('Number of Trainable Parameters', fontsize=12)
        plt.ylabel('Validation Accuracy', fontsize=12)
        plt.title('NSGA-II Pareto Front: Accuracy vs Model Size', fontsize=14)
        plt.legend(loc='lower right')
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(filename, dpi=150)
        plt.close()
        print(f"Pareto front plot saved to {filename}")


    def save_pareto_front(self, filename: str = "pareto_front.csv"):
        """
        Save the Pareto front individuals to a CSV file.
        """
        if self.final_population is None:
            print("No final population available. Run optimization first.")
            return None
        
        pareto_front = self.get_pareto_front()
        
        pareto_results = []
        for i, ind in enumerate(pareto_front):
            params = repair_individual(ind)
            record = {
                "pareto_rank": 0,  # All are on the first front
                "solution_id": i,
                "learning_rate": params[0],
                "warmup_ratio": params[1],
                "rank": params[2],
                "alpha": params[3],
                "dropout": params[4],
                "target_modules": str(["q_lin", "v_lin"] if params[5]==0 
                                      else ["q_lin", "v_lin", "ffn.lin1", "ffn.lin2"]),
                "accuracy": ind.fitness.values[0],
                "trainable_params": ind.fitness.values[1],
                # For convenience in plotting, also store objectives as error rate
                "error_rate": 1.0 - ind.fitness.values[0]
            }
            pareto_results.append(record)
        
        df = pd.DataFrame(pareto_results)
        # Sort by accuracy descending for readability
        df = df.sort_values("accuracy", ascending=False).reset_index(drop=True)
        df.to_csv(filename, index=False)
        
        print(f"\n{'='*60}")
        print(f"PARETO FRONT SAVED: {filename}")
        print(f"{'='*60}")
        print(f"Number of Pareto-optimal solutions: {len(pareto_front)}")
        print(f"\nPareto Front Summary:")
        print(f"  Accuracy range: [{df['accuracy'].min():.4f}, {df['accuracy'].max():.4f}]")
        print(f"  Params range: [{df['trainable_params'].min():.0f}, {df['trainable_params'].max():.0f}]")
        print(f"{'='*60}")
        
        return df


    def save_results(self, filename: str):
        if not self.results:
            print("No results to save.")
            return

        df = pd.DataFrame(self.results)
        df.to_csv(filename, index=False)
        print(f"\nResults saved to {filename}")

        # Print best result
        if not df.empty:
            best_run = df.loc[df['accuracy'].idxmax()]
            print("\n" + "="*60)
            print("BEST CONFIGURATION FOUND (by Accuracy):")
            print("="*60)
            print(f"Accuracy: {best_run['accuracy']:.4%}")
            print(f"Trainable Params: {best_run['trainable_params']}")
            print(f"Learning Rate: {best_run['learning_rate']:.2e}")
            print(f"Warmup Ratio: {best_run['warmup_ratio']}")
            print(f"Rank: {best_run['rank']}")
            print(f"Alpha: {best_run['alpha']}")
            print(f"Dropout: {best_run['dropout']}")
            print(f"Target Modules: {best_run['target_modules']}")
            print("="*60)

In [None]:
if __name__ == "__main__":
    set_global_seed(SEED)

    try:
        # 2. Initialize NSGA2 Optimizer
        optimizer = NSGA2_HyperparameterOptimizer()

        # 3. Run Optimization
        final_pop = optimizer.run_optimization()

    except KeyboardInterrupt:
        print("\n\nOptimization interrupted by user.")
    except Exception as e:
        print(f"\n\nCritical failure: {e}")
        import traceback
        traceback.print_exc()
    finally:
        # 4. Save results
        if 'optimizer' in locals():
            optimizer.save_results("nsga2_optimization_results.csv")
            
            pareto_df = optimizer.save_pareto_front("pareto_front.csv")
            
            optimizer.plot_pareto_front("pareto_front.png")

        cleanup_memory()
        print("\nProcess Complete.")