In [None]:
# 1. Install evaluate
!pip install -q evaluate

# 2. THE FIX: Force install this specific version of protobuf
!pip install -q "protobuf==3.20.3"

In [None]:
import torch
import random
import numpy as np
import pandas as pd
import gc
import time

# Import Hugging Face libraries
import evaluate
from datasets import load_dataset, Dataset, DatasetDict, IterableDataset, IterableDatasetDict
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding, EvalPrediction
from peft import LoraConfig, TaskType, get_peft_model

In [None]:
# Import the Python types
from typing import List, Dict, Any, Tuple, cast, Optional

from dataclasses import dataclass, asdict

In [None]:
SEED = 42
TRAIN_SAMPLE_SIZE = 3000
TOTAL_TRIALS = 20
NUM_LABELS = 6
MAX_LENGTH = 128
MODEL = "distilbert-base-uncased"

In [None]:
def set_global_seed(seed: int):
  """
  Set the global seed for reproducibility.
  """
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)

  # Check if CUDA GPU is available
  if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)

In [None]:
set_global_seed(SEED)

In [None]:
@dataclass(frozen=True, order=True)
class LoraHyperparameters:
  learning_rate: float
  warmup_ratio: float
  rank: int
  alpha: int
  dropout: float
  target_modules: List[str]

  @staticmethod # The 'generate_random_hyperparameters' doesn't take an instance of 'self', hence why we use '@staticmethod'
  def generate_random_hyperparameters() -> 'LoraHyperparameters':

    # Target modules: (Attention Only) OR (Attention + Feedforward)
      # Option A: ["q_lin", "v_lin"]
      # Option B: ["q_lin", "v_lin", "ffn.lin1", "ffn.lin2"]
    module_choice = random.choice(["attn", "attn_ffn"])

    if module_choice == "attn":
        target_modules = ["q_lin", "v_lin"]
    else:
        target_modules = ["q_lin", "v_lin", "ffn.lin1", "ffn.lin2"]

    # Return an instance of the LoraHyperparameters class
    return LoraHyperparameters(
      # learning_rate=random.uniform(5e-6, 5e-4), # Learning rate is a continous value
      learning_rate=random.uniform(1e-5, 2e-4),
      warmup_ratio=random.choice([0.0, 0.06, 0.1]), # Warm-up ratio is a discrete value
      rank=random.choice([2, 4, 8, 16, 24]), # LoRA rank is a continous value
      alpha = random.choice([8, 16, 32, 64, 96]), # Alpha is a discrete value
      dropout = random.choice([0, 0.05, 0.1, 0.2]), # Dropout is a discrete value
      target_modules=target_modules
    )

In [None]:
class DataManager:
  def __init__(self, model_name: str = MODEL):
    self.tokenizer = AutoTokenizer.from_pretrained(model_name)
    self.dataset: Optional[Dict[str, Any]] = None

  def prepare_data(self) -> Dict[str, Any]:
    """
    Loads the dataset and processes it.
    """

    # Check if the dataset is correctly loaded into the instance memory
    if self.dataset is not None:
        return self.dataset

    print("Loading and processing data...")

    # Load full dataset
    full_dataset = cast(DatasetDict, load_dataset("dair-ai/emotion"))

    # Use seed to ensure every run uses the SAME subset of data
    train_subset = full_dataset["train"].shuffle(seed=SEED).select(range(TRAIN_SAMPLE_SIZE))

    # Private helper method for text embeddings
    def _tokenize(examples):
      return self.tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=MAX_LENGTH
      )

    tokenized_train_dataset = train_subset.map(_tokenize, batched=True)
    tokenized_validation_dataset = full_dataset["validation"].map(_tokenize, batched=True)

    self.dataset = {
        "train": tokenized_train_dataset,
        "validation": tokenized_validation_dataset,
        "tokenizer": self.tokenizer,
        "num_labels": NUM_LABELS
    }

    print("Data preparation complete.")

    return self.dataset

In [None]:
data_manager = DataManager()
data_bundle = data_manager.prepare_data()

In [None]:
class RandomSearchExperiment:
  def __init__(self, data_bundle: Dict[str, Any], total_trials: int = 20):
    self.data = data_bundle
    self.total_trials = total_trials
    self.results: List[Dict[str, Any]] = []
    self.metric = evaluate.load("accuracy")

  def _compute_metrics(self, eval_pred: EvalPrediction) -> Dict[str, float]:
    """
    Calculates accuracy during training.
    """
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    result = self.metric.compute(predictions=predictions, references=labels)

    return cast(Dict[str, float], result)

  def _cleanup_memory(self, model, trainer):
    """
    Forcefully clears GPU memory.
    """
    del model
    del trainer
    torch.cuda.empty_cache()
    gc.collect()

  def run_single_trial(self, trial_id: int, params: LoraHyperparameters, seed: Optional[int] = None) -> Tuple[float, float]:
    """
    Executes one training run with specific hyperparameters.
    """
    print(f"\n[Trial {trial_id}/{self.total_trials}] Starting...")
    print(f"   Params: Rank={params.rank}, Alpha={params.alpha}, LR={params.learning_rate:.2e}")

    # # Add 'trial_id' to the 'SEED' to ensure each trial is unique
    # current_seed = SEED + trial_id

    if seed is not None:
      current_seed = seed
    else:
      current_seed = SEED + trial_id

    # Initialize the base model
    model = AutoModelForSequenceClassification.from_pretrained(
      MODEL,
      num_labels=self.data["num_labels"]
    )

    # LoRA configuration
    peft_config = LoraConfig(
      task_type=TaskType.SEQ_CLS,
      r=params.rank,
      lora_alpha=params.alpha,
      lora_dropout=params.dropout,
      target_modules=params.target_modules
    )

    model = get_peft_model(model, peft_config)

    args = TrainingArguments(
      output_dir=f"./results/trial_{trial_id}",
      learning_rate=params.learning_rate,
      per_device_train_batch_size=16,
      per_device_eval_batch_size=16,
      num_train_epochs=3,
      warmup_ratio=params.warmup_ratio,
      weight_decay=0.01,
      eval_strategy="epoch", # Updated from 'evaluation_strategy'
      save_strategy="no", # Don't save checkpoints (saves disk space)
      logging_strategy="epoch",
      seed=current_seed,
      report_to="none", # Disable WANDB
      load_best_model_at_end=False,
      optim="adamw_torch"
    )

    # Initialize the Trainer
    trainer = Trainer(
      model=model,
      args=args,
      train_dataset=self.data["train"],
      eval_dataset=self.data["validation"],
      data_collator=DataCollatorWithPadding(tokenizer=self.data["tokenizer"]),
      compute_metrics=self._compute_metrics
    )

    start_time = time.time()

    # Train and Evaluate
    trainer.train()
    eval_results = trainer.evaluate()

    end_time = time.time()
    trial_duration = end_time - start_time

    accuracy = eval_results["eval_accuracy"]

    print(f"   [Trial {trial_id}] Complete. Accuracy: {accuracy:.4%} | Time: {trial_duration:.2f}s")

    # Cleanup the memory
    self._cleanup_memory(model, trainer)

    return accuracy, trial_duration

  def verify_top_trials(self, top_k: int = 5, seeds: List[int] = [42, 43, 44]):
    # Check if there are any results
    if not self.results:
      print("No results found in memory. Please run experiment first.")
      return

    print(f"\n" + "="*40)
    print(f"STARTING ROBUSTNESS VERIFICATION (Top {top_k} Models)")
    print("="*40)

    # Sort the results by accuracy in descending order and slice the top K
    sorted_results = sorted(self.results, key=lambda x: x["accuracy"], reverse=True)
    top_k_results = sorted_results[:top_k]

    robustness_data: List[Dict[str, Any]] = []

    for i, trial in enumerate(top_k_results, start=1):
      trial_id = trial["trial_id"]
      original_accuracy = trial["accuracy"] # Renamed to avoid confusion
      
      print(f"\n>>> Verifying Rank {i}: Trial {trial_id} (Original Acc: {original_accuracy:.4%})")

      params = LoraHyperparameters(
          learning_rate=trial["learning_rate"],
          warmup_ratio=trial["warmup_ratio"],
          rank=trial["rank"],
          alpha=trial["alpha"],
          dropout=trial["dropout"],
          target_modules=trial["target_modules"]
      )

      current_accuracies = []

      for seed in seeds:
        # Fixed Argument passing syntax here
        new_acc, _ = self.run_single_trial(trial_id, params, seed=seed)
        current_accuracies.append(new_acc)

        # Explicit garbage collection
        torch.cuda.empty_cache()
        gc.collect()
      
      mean_accuracy = np.mean(current_accuracies)
      std_accuracy = np.std(current_accuracies)
      
      print(f"    -> Result: {mean_accuracy:.4%} Â± {std_accuracy:.4%}")

      entry = {
          "trial_id": trial_id,
          "original_accuracy": original_accuracy,
          "mean_accuracy": mean_accuracy,
          "std_accuracy": std_accuracy,
          "all_seed_accuracies": current_accuracies
      }

      entry.update(asdict(params))
      robustness_data.append(entry)

    df_robust = pd.DataFrame(robustness_data)
    filename = "robustness_verification_results.csv"
    df_robust.to_csv(filename, index=False)

    print(f"\nRobustness verification complete. Saved to {filename}")


  def run_experiment(self):
    """
    Main loop to execute the random search.
    """
    print(f"Starting Random Search for {self.total_trials} trials...")

    for i in range(self.total_trials):
      trial_id = i + 1

      try:
        # Generate the random hyperparameters
        params = LoraHyperparameters.generate_random_hyperparameters()

        # Run the current trial
        accuracy, trial_duration = self.run_single_trial(trial_id=trial_id, params=params)

        # Log the current result
        result_entry = {
            "trial_id": trial_id,
            "accuracy": accuracy,
            "trial_duration_in_seconds": trial_duration
        }

        # Flatten parameters into the dict for easier CSV saving
        result_entry.update(asdict(params))

        self.results.append(result_entry)

      except Exception as e:
        print(f"!!! CRITICAL ERROR in Trial {trial_id}: {e}")

        # Clean the memory
        torch.cuda.empty_cache()
        gc.collect()

    print("\nExperiment Completed.")

  def save_results(self, filename="random_search_results.csv"):
    """
    Saves results to CSV and prints summary stats.
    """
    if not self.results:
      print("No results to save.")
      return

    df = pd.DataFrame(self.results)

    # Summary
    best_idx = df['accuracy'].idxmax()
    best_row = df.loc[best_idx]

    best_accuracy = best_row['accuracy'].item()
    best_trial_id = int(best_row['trial_id'].item())

    print("\n" + "="*40)
    print("RESULTS SUMMARY")
    print("="*40)
    print(f"Best Accuracy: {best_accuracy:.4%} (Trial {best_trial_id})")
    print(f"Mean Accuracy: {df['accuracy'].mean():.4%}")
    print(f"Std Dev      : {df['accuracy'].std():.4%}")

    # Export
    df.to_csv(filename, index=False)

    print(f"Results saved to {filename}")

In [None]:
# Create the Experiment
experiment = RandomSearchExperiment(data_bundle, total_trials=TOTAL_TRIALS)

start_experiment_time = time.time()

# Run the full experiment of 20 trials
experiment.run_experiment()

end_experiment_time = time.time()

total_duration = end_experiment_time - start_experiment_time

print(f"Total time taken to complete 20 experiments: {str(total_duration)} seconds")

# Save the results from running the full experiment
experiment.save_results()

experiment.verify_top_trials(top_k=5, seeds=[42, 43, 44])