<a href="https://colab.research.google.com/github/jsl5710/greenland/blob/main/GREENLAND_Fine_tuning_mostupdated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step 1: Setup & Installation

In [None]:
# Install and upgrade necessary libraries
# !pip install --quiet --upgrade pip
# !pip install --quiet --upgrade transformers
# !pip install --quiet --upgrade datasets
# !pip install --quiet --upgrade wandb
# !pip install --quiet git+https://github.com/huggingface/peft.git peft


# First uninstall existing packages to avoid conflicts
# !pip uninstall -y transformers adapters adapter-transformers

# Now install in the correct order with specific versions
!pip install --quiet transformers==4.46.2
!pip install --quiet adapter-transformers
!pip install --quiet adapters
!pip install --quiet datasets
!pip install --quiet wandb
!pip install --quiet git+https://github.com/huggingface/peft.git
!pip install colorlog
# Verify installations
print("Installed versions:")
!pip show transformers | grep Version
!pip show adapter-transformers | grep Version


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m120.7 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
adapters 1.0.1 requires transformers~=4.45.2, but you have transformers 4.46.2 which is incompatible.[0m[31m
[0m  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Installed versions:
Version: 4.45.2
Version: 4.0.0


# Step 2: Import Libraries

In [None]:
# import os
# import torch
# import wandb
# import pandas as pd
# from typing import Dict, Optional
# from torch.utils.data import DataLoader
# from transformers import (
#     AutoTokenizer,
#     AutoModelForSequenceClassification,
#     Trainer,
#     TrainingArguments,
#     DataCollatorWithPadding,
# )

# from peft import (
#     LoraConfig,
#     PrefixTuningConfig,
#     PromptTuningConfig,
#     AdaLoraConfig,
#     IA3Config,
#     get_peft_model,
#     PeftModel,
#     TaskType,
#     PeftConfig
# )
# from datasets import load_dataset, Dataset
# from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
# from peft import get_peft_model, LoraConfig, TaskType, AutoPeftModelForSequenceClassification
# from google.colab import drive
# from requests.exceptions import HTTPError

import os
import torch
import wandb
import pickle
import numpy as np
import pandas as pd
from typing import Dict, List, Optional, Union, Callable
from dataclasses import dataclass
from torch.utils.data import DataLoader
from requests.exceptions import HTTPError
from google.colab import drive
from pathlib import Path
import logging

# Transformers imports
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
    TrainerCallback,
)

# Adapter imports
from adapters import (
    SeqBnConfig,              # Sequential bottleneck adapter
    DoubleSeqBnConfig,        # Double sequential bottleneck
    ParBnConfig,              # Parallel bottleneck
    SeqBnInvConfig,           # Sequential invertible adapter
    DoubleSeqBnInvConfig,     # Double sequential invertible
    CompacterConfig,          # Compacter
    CompacterPlusPlusConfig,  # Compacter++
    PrefixTuningConfig,       # Prefix tuning
    LoRAConfig,               # LoRA
    IA3Config,                # IA³
    MAMConfig,                # Mix-and-Match
    UniPELTConfig,            # UniPELT
    PromptTuningConfig,       # Prompt Tuning
    LoReftConfig,             # ReFT
    NoReftConfig,             # NoReFT
    DiReftConfig,             # DiReFT
    Stack,                    # For stacking adapters
    Parallel,                 # For parallel adapters
    Fuse,                     # For adapter fusion
    AdapterConfig,            # Base adapter config
    ConfigUnion,               # For combining configurations
    AutoAdapterModel,
    AdapterTrainer
)

# PEFT imports
from peft import (
    LoraConfig,
    PrefixTuningConfig,
    PromptTuningConfig,
    AdaLoraConfig,
    IA3Config,
    get_peft_model,
    PeftModel,
    TaskType,
    PeftConfig,
    AutoPeftModelForSequenceClassification
)

# Dataset and metrics imports
from datasets import (
    load_dataset,
    Dataset,
    DatasetDict
)
from sklearn.metrics import (
    f1_score,
    accuracy_score,
    precision_score,
    recall_score,
    roc_auc_score,
    classification_report,
    confusion_matrix
)

# Custom trainer components
class GatingScoreCallback(TrainerCallback):
    """Callback for monitoring adapter gating scores"""
    def on_step_end(self, args, state, control, model=None, **kwargs):
        if hasattr(model, "get_adapter_gating_scores"):
            scores = model.get_adapter_gating_scores()
            if wandb.run:
                wandb.log({"gating_scores": scores})

class ModuleNotFoundHandler:
    """Handler for gracefully managing missing optional dependencies"""
    @staticmethod
    def check_optional_dependencies():
        dependencies = {
            "wandb": "Weights & Biases for experiment tracking",
            "adapter-transformers": "Adapter-based fine-tuning support",
            "peft": "Parameter-Efficient Fine-Tuning support"
        }

        missing = []
        for module, description in dependencies.items():
            try:
                __import__(module)
            except ImportError:
                missing.append(f"{module} ({description})")

        if missing:
            print("Optional dependencies missing:")
            for m in missing:
                print(f"- {m}")
            print("\nYou can install them with:")
            print("pip install " + " ".join([m.split()[0] for m in missing]))

# Initialize dependency check
ModuleNotFoundHandler.check_optional_dependencies()

Optional dependencies missing:
- adapter-transformers (Adapter-based fine-tuning support)

You can install them with:
pip install adapter-transformers


# Step 3: Define Model Checkpoints

In [None]:
model_checkpoints = {
    "MBERT_uncased": {
        "path": "google-bert/bert-base-multilingual-uncased",
        "max_length": 512
    },
    # "XLM_100": {
    #     "path": "FacebookAI/xlm-mlm-100-1280",
    #     "max_length": 512
    # },
    # "XLM_17": {
    #     "path": "FacebookAI/xlm-mlm-17-1280",
    #     "max_length": 512
    # },
    # "XLM-RoBERTa_xxl": {
    #     "path": "facebook/xlm-roberta-xxl",
    #     "max_length": 512
    # },
    # "mDeBERTa_v3_base": {
    #     "path": "microsoft/mdeberta-v3-base",
    #     "max_length": 512
    # },
    # "S-BERT_LaBSE": {
    #     "path": "sentence-transformers/LaBSE",
    #     "max_length": 512
    # },
    # "S-BERT_distiluse": {
    #     "path": "sentence-transformers/distiluse-base-multilingual-cased",
    #     "max_length": 512
    # },
    # "XLM-R_bernice": {
    #     "path": "jhu-clsp/bernice",
    #     "max_length": 512
    # },
    # "XLM-T_twitter": {
    #     "path": "cardiffnlp/twitter-xlm-roberta-base",
    #     "max_length": 512
    # },
    # "XLM-E_align": {
    #     "path": "microsoft/xlm-align-base",
    #     "max_length": 512
    # },
    # "XLM-E_infoxlm_large": {
    #     "path": "microsoft/infoxlm-large",
    #     "max_length": 512
    # },
    # "XLM-V_base": {
    #     "path": "facebook/xlm-v-base",
    #     "max_length": 512
    # }
}


# model_checkpoints = {
#     "MBERT_uncased": "google-bert/bert-base-multilingual-uncased",
#     # "MBERT_cased": "google-bert/bert-base-multilingual-cased",
#     "XLM_100": "FacebookAI/xlm-mlm-100-1280",
#     "XLM_17": "FacebookAI/xlm-mlm-17-1280",
#     # "XLM-RoBERTa_large": "FacebookAI/xlm-roberta-large",
#     # "XLM-RoBERTa_base": "FacebookAI/xlm-roberta-base",
#     # "XLM-RoBERTa_xl": "facebook/xlm-roberta-xl",
#     "XLM-RoBERTa_xxl": "facebook/xlm-roberta-xxl",
#     "mDeBERTa_v3_base": "microsoft/mdeberta-v3-base",
#     # "M-distilBERT": "distilbert/distilbert-base-multilingual-cased",
#     "S-BERT_LaBSE": "sentence-transformers/LaBSE",
#     "S-BERT_distiluse": "sentence-transformers/distiluse-base-multilingual-cased",
#     "XLM-R_bernice": "jhu-clsp/bernice",
#     "XLM-T_twitter": "cardiffnlp/twitter-xlm-roberta-base",
#     "XLM-E_align": "microsoft/xlm-align-base",
#     # "XLM-E_infoxlm_base": "microsoft/infoxlm-base",
#     "XLM-E_infoxlm_large": "microsoft/infoxlm-large",
#     "XLM-V_base": "facebook/xlm-v-base"
# }

# model_checkpoints = {
    # "MBERT_uncased": "google-bert/bert-base-multilingual-uncased",
    # "XLM_100": "FacebookAI/xlm-mlm-100-1280",
    # "XLM_17": "FacebookAI/xlm-mlm-17-1280",
    # "XLM-RoBERTa_xxl": "facebook/xlm-roberta-xxl",
    # "mDeBERTa_v3_base": "microsoft/mdeberta-v3-base",
    # "S-BERT_LaBSE": "sentence-transformers/LaBSE",
    # "S-BERT_distiluse": "sentence-transformers/distiluse-base-multilingual-cased",
    # "XLM-R_bernice": "jhu-clsp/bernice",
    # "XLM-T_twitter": "cardiffnlp/twitter-xlm-roberta-base",
    # "XLM-E_align": "microsoft/xlm-align-base",
    #     "XLM-E_infoxlm_large": "microsoft/infoxlm-large",
    # "XLM-V_base": "facebook/xlm-v-base"
# }



# Step 4: Authenticate and Initialize

In [None]:
# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Authenticate with Hugging Face
!huggingface-cli login --token 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

# Authenticate with W&B
def authenticate_wandb():
    try:
      wandb.login(key="1b5caf38a8b6ada0e6918798e9379b2ea764062d")
      wandb.init(project="greenland")
      print("Authentication successful!")
    except HTTPError as e:
      print(f"Authentication failed: {e}")

authenticate_wandb()


Mounted at /content/drive
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
The token `greenland` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `greenland`


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Authentication successful!


# Step 5: Define Save Paths and Ensure Directories Exist

In [None]:
# Define save locations
local_save_path = "/content/sample_data/best_models/"
drive_save_path = "/content/drive/MyDrive/GREENLAND/Modeling/Best_models/"
results_dir = "/content/drive/MyDrive/GREENLAND/Results/"

# Ensure save directories exist
os.makedirs(local_save_path, exist_ok=True)
os.makedirs(drive_save_path, exist_ok=True)
os.makedirs(results_dir, exist_ok=True)


# Step 6: Load and Process the Dataset

In [None]:
# # Load datasets from CSV files in Google Drive
# train_df = pd.read_csv('/content/drive/MyDrive/GREENLAND/Datasets/Consolidated_Data/Experiment_Training_Splits/train_data.csv')
# val_df = pd.read_csv('/content/drive/MyDrive/GREENLAND/Datasets/Consolidated_Data/Experiment_Training_Splits/val_data.csv')
# test_df = pd.read_csv('/content/drive/MyDrive/GREENLAND/Datasets/Consolidated_Data/Experiment_Training_Splits/test_data.csv')

# # Convert to Hugging Face Dataset format
# train_dataset = Dataset.from_pandas(train_df)
# val_dataset = Dataset.from_pandas(val_df)
# test_dataset = Dataset.from_pandas(test_df)


# # Combine datasets into a dictionary for easy access
# dataset = {
#     "train": train_dataset,
#     "validation": val_dataset,
#     "test": test_dataset
# }


# # Load datasets from CSV files in Google Drive
# train_df = pd.read_csv('/content/drive/MyDrive/GREENLAND/Datasets/Consolidated_Data/Experiment_Training_Splits/train_data.csv')
# val_df = pd.read_csv('/content/drive/MyDrive/GREENLAND/Datasets/Consolidated_Data/Experiment_Training_Splits/val_data.csv')
# test_df = pd.read_csv('/content/drive/MyDrive/GREENLAND/Datasets/Consolidated_Data/Experiment_Training_Splits/test_data.csv')

# # Sample 1000 examples from each dataset with random seed for reproducibility
# train_df_sampled = train_df.sample(n=1000, random_state=42)
# val_df_sampled = val_df.sample(n=1000, random_state=42)
# test_df_sampled = test_df.sample(n=1000, random_state=42)

# # Convert to Hugging Face Dataset format
# train_dataset = Dataset.from_pandas(train_df_sampled)
# val_dataset = Dataset.from_pandas(val_df_sampled)
# test_dataset = Dataset.from_pandas(test_df_sampled)

# # Combine datasets into a dictionary for easy access
# dataset = {
#     "train": train_dataset,
#     "validation": val_dataset,
#     "test": test_dataset
# }

# print("Dataset sizes after sampling:")
# print(f"Train: {len(train_dataset)}")
# print(f"Validation: {len(val_dataset)}")
# print(f"Test: {len(test_dataset)}")

# Step 7: Define Dataset Processing Functions

In [None]:
def verify_dataset(dataset):
    print("\nDataset Verification:")
    for split in dataset.keys():
        print(f"\n{split.capitalize()} set:")
        print("Number of examples:", len(dataset[split]))
        print("Features:", dataset[split].features)
        print("Sample labels:", dataset[split]["label"][:5])
        # Check data type differently
        print("Label type:", type(dataset[split]["label"]))
        # Print first few label types to understand the structure
        print("Sample label types:", [type(label) for label in dataset[split]["label"][:5]])
    return True

def tokenize_datasets(model_name, dataset):
    model_info = model_checkpoints[model_name]
    tokenizer = AutoTokenizer.from_pretrained(model_info["path"])
    max_length = model_info["max_length"]

    print(f"Using max_length={max_length} for model {model_name}")

    def preprocess_function(examples):
        # Convert boolean labels to integers, handling list input
        labels = [int(label) if isinstance(label, bool) else int(bool(label))
                 for label in examples["label"]]

        # Tokenize the text
        tokenized = tokenizer(
            examples["text"],
            truncation=True,
            padding=True,
            max_length=max_length,
            return_tensors=None
        )

        # Add converted labels to the tokenized output
        tokenized["labels"] = labels
        return tokenized

    # Print sample of data before tokenization
    print("\nBefore tokenization:")
    print("Sample of original labels:", dataset["train"]["label"][:5])
    print("Original label type:", type(dataset["train"]["label"][0]))

    tokenized_data = {
        split: data.map(
            preprocess_function,
            batched=True,
            batch_size=1000,
            num_proc=4,
            remove_columns=data.column_names,
            desc=f"Tokenizing {split} set"
        )
        for split, data in dataset.items()
    }

    # Verify the processed labels
    print("\nAfter tokenization:")
    print("Sample of processed labels:", tokenized_data["train"]["labels"][:5])
    print("Processed label type:", type(tokenized_data["train"]["labels"][0]))

    return tokenized_data

def analyze_text_lengths(dataset):
    """
    Analyze text lengths in the dataset without tokenization first
    """
    # Get raw text lengths
    lengths = [len(text.split()) for text in dataset["train"]["text"]]

    stats = {
        "average_length": sum(lengths)/len(lengths),
        "max_length": max(lengths),
        "median_length": sorted(lengths)[len(lengths)//2],
        "95th_percentile": sorted(lengths)[int(len(lengths)*0.95)],
        "length_distribution": {
            "< 128 words": sum(1 for l in lengths if l < 128),
            "128-256 words": sum(1 for l in lengths if 128 <= l < 256),
            "256-512 words": sum(1 for l in lengths if 256 <= l < 512),
            "> 512 words": sum(1 for l in lengths if l >= 512)
        }
    }

    # Calculate percentages for distribution
    total_samples = len(lengths)
    stats["length_distribution_percent"] = {
        k: (v/total_samples * 100) for k, v in stats["length_distribution"].items()
    }

    print("\nText Length Analysis (word-based):")
    print(f"Average length: {stats['average_length']:.1f} words")
    print(f"Median length: {stats['median_length']} words")
    print(f"Max length: {stats['max_length']} words")
    print(f"95th percentile: {stats['95th_percentile']} words")
    print("\nLength Distribution:")
    for category, count in stats["length_distribution"].items():
        percentage = stats["length_distribution_percent"][category]
        print(f"{category}: {count} texts ({percentage:.1f}%)")

    # Character-based analysis
    char_lengths = [len(text) for text in dataset["train"]["text"]]
    stats["char_stats"] = {
        "average_length": sum(char_lengths)/len(char_lengths),
        "max_length": max(char_lengths),
        "median_length": sorted(char_lengths)[len(char_lengths)//2],
        "95th_percentile": sorted(char_lengths)[int(len(char_lengths)*0.95)]
    }

    print("\nCharacter-based Analysis:")
    print(f"Average length: {stats['char_stats']['average_length']:.1f} characters")
    print(f"Median length: {stats['char_stats']['median_length']} characters")
    print(f"Max length: {stats['char_stats']['max_length']} characters")
    print(f"95th percentile: {stats['char_stats']['95th_percentile']} characters")

    return stats



# Step 8: Define Loss Functions

In [None]:
class WeightedFocalLoss(torch.nn.Module):
    def __init__(self, alpha=0.25, gamma=2):
        super(WeightedFocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, logits, labels):
        # Apply softmax for multi-class probabilities
        probs = torch.softmax(logits, dim=1)[:, 1]  # Probability for positive class
        labels = labels.float()
        BCE_loss = torch.nn.functional.binary_cross_entropy(probs, labels, reduction='none')
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
        return F_loss.mean()

class SymmetricCrossEntropyLoss(torch.nn.Module):
    def __init__(self, alpha=0.1, beta=1.0):
        super(SymmetricCrossEntropyLoss, self).__init__()
        self.alpha = alpha
        self.beta = beta

    def forward(self, logits, labels):
        ce_loss = torch.nn.functional.cross_entropy(logits, labels)
        labels_one_hot = torch.nn.functional.one_hot(labels, num_classes=logits.size(-1))
        rce_loss = -((torch.softmax(logits, dim=1) * labels_one_hot).sum(dim=-1).log().mean())
        return self.alpha * ce_loss + self.beta * rce_loss


class ModifiedBCEWithLogitsLoss(torch.nn.Module):
    def forward(self, logits, labels):
        # Ensure logits are the right shape (batch_size, num_classes)
        if len(logits.shape) == 1:
            logits = logits.unsqueeze(-1)
        # Get the positive class logits
        pos_logits = logits[:, 1]
        return torch.nn.functional.binary_cross_entropy_with_logits(
            pos_logits, labels.float(), reduction='mean'
        )

class ModifiedSquaredBCEWithLogitsLoss(torch.nn.Module):
    def forward(self, logits, labels):
        # Convert labels to float and ensure correct shape
        labels = labels.float().view(-1)

        # Ensure logits are the right shape for binary classification
        if len(logits.shape) > 1 and logits.shape[1] == 2:
            logits = logits[:, 1]  # Take the logit for positive class

        # Apply sigmoid to get probabilities
        probs = torch.sigmoid(logits)
        return torch.mean((probs - labels) ** 2)

class ModifiedWeightedBinaryCrossEntropy(torch.nn.Module):
    def __init__(self, pos_weight):
        super().__init__()
        self.pos_weight = pos_weight

    def forward(self, logits, labels):
        # Convert labels to float and ensure correct shape
        labels = labels.float().view(-1)

        # Ensure logits are the right shape for binary classification
        if len(logits.shape) > 1 and logits.shape[1] == 2:
            logits = logits[:, 1]  # Take the logit for positive class

        return torch.nn.functional.binary_cross_entropy_with_logits(
            logits, labels, pos_weight=self.pos_weight, reduction='mean'
        )

class ModifiedSupervisedContrastiveCrossEntropyLoss(torch.nn.Module):
    def __init__(self, temperature=0.07, lam=0.5):
        super().__init__()
        self.temperature = temperature
        self.lam = lam
        self.ce_loss = torch.nn.CrossEntropyLoss()

    def forward(self, logits, labels):
        # Standard cross-entropy loss
        ce_loss = self.ce_loss(logits, labels.long())

        # Contrastive loss
        normalized_logits = torch.nn.functional.normalize(logits, dim=1)
        similarity_matrix = torch.matmul(normalized_logits, normalized_logits.t()) / self.temperature

        # Create mask for positive pairs
        labels = labels.view(-1, 1)
        mask = (labels == labels.t()).float()

        # Compute contrastive loss
        exp_sim = torch.exp(similarity_matrix)
        log_prob = similarity_matrix - torch.log(exp_sim.sum(dim=1, keepdim=True))

        # Compute mean of positive pairs
        mask_sum = mask.sum(dim=1)
        mask_sum = torch.clamp(mask_sum, min=1e-8)  # Avoid division by zero
        con_loss = (mask * log_prob).sum(dim=1) / mask_sum
        con_loss = -con_loss.mean()

        # Combine losses
        return self.lam * ce_loss + (1 - self.lam) * con_loss

class HuberLoss(torch.nn.Module):
    def __init__(self, delta=1.0):
        super(HuberLoss, self).__init__()
        self.delta = delta

    def forward(self, logits, labels):
        # Convert labels to float and ensure correct shape
        labels = labels.float().view(-1)

        # Ensure logits are the right shape for binary classification
        if len(logits.shape) > 1 and logits.shape[1] == 2:
            logits = logits[:, 1]  # Take the logit for the positive class

        # Apply sigmoid to get probabilities
        probs = torch.sigmoid(logits)

        # Calculate Huber loss
        diff = probs - labels
        abs_diff = torch.abs(diff)
        quadratic = torch.where(abs_diff <= self.delta, 0.5 * diff ** 2, self.delta * (abs_diff - 0.5 * self.delta))
        return quadratic.mean()


# Step 9: Loss Functions Factory

In [None]:
def get_loss_functions(device=None):
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    return {
        # "CrossEntropyLoss": torch.nn.CrossEntropyLoss().to(device),
        # "BCEWithLogitsLoss": ModifiedBCEWithLogitsLoss().to(device),
        # "SquaredBCEWithLogitsLoss": ModifiedSquaredBCEWithLogitsLoss().to(device),
        # "WeightedBinaryCrossEntropy": ModifiedWeightedBinaryCrossEntropy(
        #     pos_weight=torch.tensor([3.0]).to(device)
        # ).to(device),
        # "WeightedFocalLoss": WeightedFocalLoss(
        #     alpha=0.25,
        #     gamma=2
        # ).to(device),
        # "SymmetricCrossEntropy": SymmetricCrossEntropyLoss(
        #     alpha=0.1,
        #     beta=1.0
        # ).to(device),
        # "SupervisedContrastiveCrossEntropyLoss": ModifiedSupervisedContrastiveCrossEntropyLoss(
        #     temperature=0.07,
        #     lam=0.5
        # ).to(device),
        "HuberLoss": HuberLoss(delta=1.0).to(device)
    }

# Step 10: Evaluation Metrics

In [None]:
# Custom metrics function with debugging statements
def compute_metrics(pred):
    print("Running compute_metrics...")  # Debugging print
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    try:
        roc_auc = roc_auc_score(labels, preds)
    except ValueError:
        roc_auc = 0

    metrics = {
        'eval_accuracy': accuracy_score(labels, preds),
        'eval_f1': f1_score(labels, preds, average='binary'),
        'eval_precision': precision_score(labels, preds, average='binary'),
        'eval_recall': recall_score(labels, preds, average='binary'),
        'eval_roc_auc': roc_auc
    }
    # print("Computed metrics:", metrics)  # Debugging print
    return metrics

# Step 11: Custom Trainer

Model FT (Full and layer-wise)

In [None]:
# Custom Trainer class with additional logging for evaluation
class CustomTrainer(Trainer):
    def __init__(self, *args, loss_func=None, compute_metrics=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.loss_func = loss_func
        self.compute_metrics = compute_metrics  # Accept compute_metrics

    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        if "labels" in inputs:
            # Ensure labels are on the correct device
            if not isinstance(inputs["labels"], torch.Tensor):
                inputs["labels"] = torch.tensor(inputs["labels"], device=self.args.device)

        # Forward pass
        outputs = model(**inputs)

        if self.loss_func is not None:
            logits = outputs.logits
            labels = inputs["labels"]

            # Binary classification losses need special handling
            if isinstance(self.loss_func, (ModifiedSquaredBCEWithLogitsLoss,
                                           ModifiedWeightedBinaryCrossEntropy,
                                           ModifiedBCEWithLogitsLoss)):
                loss = self.loss_func(logits, labels)
            else:
                # For cross entropy based losses
                labels = labels.long()
                loss = self.loss_func(logits, labels)
        else:
            loss = outputs.loss

        return (loss, outputs) if return_outputs else loss

    def evaluate(self, eval_dataset=None, **kwargs):
        # Log that evaluation is being performed
        print("Performing evaluation...")
        results = super().evaluate(eval_dataset=eval_dataset, **kwargs)
        if self.compute_metrics is not None:
            pass
            # print("Evaluated metrics:", results)  # Debugging print
        else:
          pass
            # print("Warning: compute_metrics function was not used.")
        return results

# cleanup function
def cleanup():
    """Clean up GPU memory"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

Adaptor FT

In [None]:
class CustomAdapterTrainer(AdapterTrainer):
    """
    Custom AdapterTrainer that supports custom loss functions and additional logging
    Inherits from AdapterTrainer to maintain adapter-specific functionality
    """
    def __init__(self, *args, loss_func=None, compute_metrics=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.loss_func = loss_func
        self.compute_metrics = compute_metrics

    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        """
        Custom loss computation that supports various loss functions
        Handles both adapter-specific behavior and custom loss functions
        """
        if "labels" in inputs:
            # Ensure labels are on the correct device
            if not isinstance(inputs["labels"], torch.Tensor):
                inputs["labels"] = torch.tensor(inputs["labels"], device=self.args.device)

        # Forward pass
        outputs = model(**inputs)

        if self.loss_func is not None:
            logits = outputs.logits
            labels = inputs["labels"]

            # Binary classification losses need special handling
            if isinstance(self.loss_func, (ModifiedSquaredBCEWithLogitsLoss,
                                         ModifiedWeightedBinaryCrossEntropy,
                                         ModifiedBCEWithLogitsLoss)):
                loss = self.loss_func(logits, labels)
            else:
                # For cross entropy based losses
                labels = labels.long()
                loss = self.loss_func(logits, labels)

            # Store loss for adapter-specific logging
            if hasattr(outputs, "loss") and outputs.loss is not None:
                outputs.loss = loss
        else:
            loss = outputs.loss

        return (loss, outputs) if return_outputs else loss

    def evaluate(self, eval_dataset=None, **kwargs):
        """
        Enhanced evaluation method with additional logging
        """
        logger = logging.getLogger(__name__)
        logger.info("Starting evaluation...")

        try:
            results = super().evaluate(eval_dataset=eval_dataset, **kwargs)

            # Log adapter-specific information
            if hasattr(self.model, "active_adapters"):
                active_adapters = self.model.active_adapters
                logger.info(f"Active adapters during evaluation: {active_adapters}")

            # Log evaluation results
            if self.compute_metrics is not None:
                logger.info(f"Evaluation metrics: {results}")

                # Log to wandb if available
                if wandb.run is not None:
                    wandb.log({
                        "eval/loss": results.get("eval_loss", 0),
                        "eval/accuracy": results.get("eval_accuracy", 0),
                        "eval/f1": results.get("eval_f1", 0),
                        "eval/precision": results.get("eval_precision", 0),
                        "eval/recall": results.get("eval_recall", 0),
                        "eval/roc_auc": results.get("eval_roc_auc", 0)
                    })
            else:
                logger.warning("No compute_metrics function provided for evaluation")

        except Exception as e:
            logger.error(f"Error during evaluation: {str(e)}")
            raise

        return results

    def log(self, logs: Dict[str, float]) -> None:
        """
        Enhanced logging method to include adapter-specific information
        """
        if hasattr(self.model, "active_adapters"):
            logs["active_adapters"] = str(self.model.active_adapters)

        # Add adapter parameter counts if available
        if hasattr(self.model, "get_adapter_parameter_counts"):
            param_counts = self.model.get_adapter_parameter_counts()
            for adapter_name, count in param_counts.items():
                logs[f"adapter_params/{adapter_name}"] = count

        super().log(logs)

    def save_model(self, output_dir: Optional[str] = None, _internal_call: bool = False):
        """
        Enhanced save method to handle both model and adapter saving
        """
        # First, save the model using parent class method
        super().save_model(output_dir, _internal_call)

        # Then save active adapters separately
        if hasattr(self.model, "active_adapters") and self.model.active_adapters:
            adapter_dir = os.path.join(output_dir, "adapters")
            os.makedirs(adapter_dir, exist_ok=True)

            for adapter_name in self.model.active_adapters:
                try:
                    adapter_path = os.path.join(adapter_dir, adapter_name)
                    self.model.save_adapter(adapter_path, adapter_name)
                    logging.info(f"Saved adapter {adapter_name} to {adapter_path}")
                except Exception as e:
                    logging.error(f"Failed to save adapter {adapter_name}: {str(e)}")

# Step 12: Model Save/Load Functions

In [None]:
# Import necessary modules
import os
import sys
import time
import logging
from pathlib import Path
from typing import Dict, Any, Optional, Callable

# Create logs directory if it doesn't exist
os.makedirs('logs', exist_ok=True)

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout),  # Print to console
        logging.FileHandler(os.path.join('logs', f'training_{time.strftime("%Y%m%d_%H%M%S")}.log'))  # Save to file with timestamp
    ]
)

# Create logger instance
logger = logging.getLogger("greenland_experiments")

# Set log level
logger.setLevel(logging.INFO)

# Create handlers for both console and file output
console_handler = logging.StreamHandler(sys.stdout)
file_handler = logging.FileHandler(os.path.join('logs', f'training_{time.strftime("%Y%m%d_%H%M%S")}.log'))

# Create formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Set formatter for handlers
console_handler.setFormatter(formatter)
file_handler.setFormatter(formatter)

# Add handlers to logger
logger.addHandler(console_handler)
logger.addHandler(file_handler)

# Ensure logger doesn't propagate to root logger
logger.propagate = False

# Function to get colored logger output (optional)
def get_colored_logger():
    """Returns a logger with colored output for better readability"""
    try:
        import colorlog

        colored_formatter = colorlog.ColoredFormatter(
            '%(log_color)s%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            log_colors={
                'DEBUG': 'cyan',
                'INFO': 'green',
                'WARNING': 'yellow',
                'ERROR': 'red',
                'CRITICAL': 'red,bg_white',
            }
        )
        console_handler.setFormatter(colored_formatter)

    except ImportError:
        logger.info("colorlog not installed. Using standard logging output.")

    return logger

# Create the colored logger (optional)
logger = get_colored_logger()

def save_model_with_fallback(trainer, model_name, training_type="full_ft", relation_type=None, objective=None):
    """
    Attempt to save model with multiple fallback options based on training type

    Args:
        trainer: The trainer instance
        model_name: Base name for the model/adapter/peft
        training_type: Type of training ('full_ft', 'adapter', 'peft', 'bitfit')
        relation_type: Type of linguistic relationship (genetic, script, word_order)
        objective: Training objective (head_to_tail, head_and_tail, tail_to_tail)
    """
    if not model_name:
        logger.error("❌ Model name cannot be empty")
        return False

    # Create timestamped version of model name with relationship and objective info
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    relationship_info = f"{relation_type}_{objective}" if relation_type and objective else ""
    full_model_name = f"{model_name}_{relationship_info}_{timestamp}" if relationship_info else f"{model_name}_{timestamp}"

    # Helper function to get active adapter safely
    def get_active_adapter(model):
      if hasattr(model, 'active_adapters'):
          active_adapters = model.active_adapters
          if isinstance(active_adapters, (list, tuple)):
              return active_adapters[0]
          elif hasattr(active_adapters, 'first'):  # Handle Stack type
              return active_adapters.first()
          return active_adapters
      return None

    # def get_active_adapter(model):
    #     if hasattr(model, 'active_adapters'):
    #         active_adapters = model.active_adapters
    #         if isinstance(active_adapters, (list, tuple)):
    #             return active_adapters[0]
    #         elif hasattr(active_adapters, 'first'):  # Handle Stack type
    #             return active_adapters.first
    #         return active_adapters
    #     return None

    try:
        # First try to save to HuggingFace Hub
        hub_path = f"jslai/{full_model_name}"

        if training_type == "full_ft":
            # Standard full fine-tuning save
            trainer.push_to_hub(hub_path)
            logger.info(f"✅ Full fine-tuned model saved to Hugging Face Hub as {hub_path}")
            return True

        elif training_type == "adapter":
            try:
                # Save base model
                trainer.save_model(hub_path)
                logger.info(f"✅ Adapter base model saved to Hugging Face Hub as {hub_path}")

                # Save adapter
                active_adapter = get_active_adapter(trainer.model)
                if active_adapter:
                    adapter_path = f"{hub_path}/adapter"
                    # trainer.model.push_adapter_to_hub(repo_id=adapter_path,adapter_name=active_adapter)
                    trainer.model.push_adapter_to_hub(repo_id=hub_path,adapter_name=active_adapter)
                    logger.info(f"✅ Adapter weights saved to Hugging Face Hub as {adapter_path}")
                return True
            except Exception as e:
                logger.warning(f"⚠️ Hub adapter save error: {str(e)}")
                raise

        elif training_type == "peft":
            try:
                # Save PEFT model and adapter
                trainer.model.save_pretrained(hub_path)
                logger.info(f"✅ PEFT model saved to Hugging Face Hub as {hub_path}")
                return True
            except Exception as e:
                logger.warning(f"⚠️ Hub PEFT save error: {str(e)}")
                raise

        elif training_type == "bitfit":
            try:
                # Save BitFit model
                trainer.save_model(hub_path)
                logger.info(f"✅ BitFit model saved to Hugging Face Hub as {hub_path}")
                return True
            except Exception as e:
                logger.warning(f"⚠️ Hub BitFit save error: {str(e)}")
                raise

    except Exception as hub_error:
        logger.warning(f"⚠️ Failed to save to Hugging Face Hub: {str(hub_error)}")

        try:
            # Try saving to Google Drive
            drive_path = os.path.join(drive_save_path, full_model_name)
            os.makedirs(drive_path, exist_ok=True)

            if training_type == "full_ft":
                trainer.save_model(drive_path)
                logger.info(f"✅ Full fine-tuned model saved to Google Drive at {drive_path}")
                return True

            elif training_type == "adapter":
                # Save base model
                trainer.save_model(drive_path)
                logger.info(f"✅ Adapter base model saved to Google Drive at {drive_path}")

                # Save adapter
                active_adapter = get_active_adapter(trainer.model)
                if active_adapter:
                    adapter_path = os.path.join(drive_path, "adapter")
                    os.makedirs(adapter_path, exist_ok=True)
                    trainer.model.save_adapter(adapter_path, active_adapter)
                    logger.info(f"✅ Adapter weights saved to Google Drive at {adapter_path}")
                return True

            elif training_type == "peft":
                # Save PEFT model with adapter
                trainer.model.save_pretrained(drive_path)
                logger.info(f"✅ PEFT model saved to Google Drive at {drive_path}")
                return True

            elif training_type == "bitfit":
                trainer.save_model(drive_path)
                logger.info(f"✅ BitFit model saved to Google Drive at {drive_path}")
                return True

        except Exception as drive_error:
            logger.warning(f"⚠️ Failed to save to Google Drive: {str(drive_error)}")

            try:
                # Finally try saving locally
                local_path = os.path.join(local_save_path, full_model_name)
                os.makedirs(local_path, exist_ok=True)

                if training_type == "full_ft":
                    trainer.save_model(local_path)
                    logger.info(f"✅ Full fine-tuned model saved locally at {local_path}")
                    return True

                elif training_type == "adapter":
                    # Save base model
                    trainer.save_model(local_path)
                    logger.info(f"✅ Adapter base model saved locally at {local_path}")

                    # Save adapter
                    active_adapter = get_active_adapter(trainer.model)
                    if active_adapter:
                        adapter_path = os.path.join(local_path, "adapter")
                        os.makedirs(adapter_path, exist_ok=True)
                        trainer.model.save_adapter(adapter_path, active_adapter)
                        logger.info(f"✅ Adapter weights saved locally at {adapter_path}")
                    return True

                elif training_type == "peft":
                    # Save PEFT model with adapter
                    trainer.model.save_pretrained(local_path)
                    logger.info(f"✅ PEFT model saved locally at {local_path}")
                    return True

                elif training_type == "bitfit":
                    trainer.save_model(local_path)
                    logger.info(f"✅ BitFit model saved locally at {local_path}")
                    return True

            except Exception as local_error:
                logger.error(f"❌ Failed to save model anywhere: {str(local_error)}")
                return False

    finally:
        # Log save attempt to wandb
        if wandb.run is not None:
            wandb.log({
                f"save_attempt_{model_name}": {
                    "timestamp": timestamp,
                    "training_type": training_type,
                    "success": True if 'local_error' not in locals() else False,
                    "location": "hub" if 'hub_error' not in locals() else
                               "drive" if 'drive_error' not in locals() else
                               "local" if 'local_error' not in locals() else "none",
                    "error": str(local_error) if 'local_error' in locals() else
                            str(drive_error) if 'drive_error' in locals() else
                            str(hub_error) if 'hub_error' in locals() else None
                }
            })

    return False



def load_best_model(model_name):
    try:
        print(f"Attempting to load {model_name} from Hugging Face Hub.")
        model = AutoModelForSequenceClassification.from_pretrained(f"jslai/{model_name}")
    except (OSError, HTTPError) as e:
        print(f"Failed to load {model_name} from Hugging Face Hub: {e}")
        try:
            google_drive_path = os.path.join(drive_save_path, model_name)
            if os.path.isdir(google_drive_path):
                print(f"Attempting to load {model_name} from Google Drive.")
                model = AutoModelForSequenceClassification.from_pretrained(google_drive_path)
            else:
                raise OSError(f"Directory {google_drive_path} does not exist on Google Drive.")
        except (OSError, HTTPError) as e:
            print(f"Failed to load {model_name} from Google Drive: {e}")
            try:
                local_path = os.path.join(local_save_path, model_name)
                if os.path.isdir(local_path):
                    print(f"Attempting to load {model_name} from local storage.")
                    model = AutoModelForSequenceClassification.from_pretrained(local_path)
                else:
                    raise OSError(f"Directory {local_path} does not exist in local storage.")
            except (OSError, HTTPError) as e:
                print(f"Failed to load {model_name} from local storage: {e}")
                raise FileNotFoundError(f"Model {model_name} could not be found in any location.")
    return model

# Step 13: Training Functions

Full FT and PEFT across loss functions

In [None]:

def full_fine_tune_all_models(model_checkpoints, dataset, loss_functions=None, relation_type=None, objective=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if loss_functions is None:
        loss_functions = get_loss_functions(device)

    for model_name, model_info in model_checkpoints.items():
        # Tokenize dataset for this model
        try:
            tokenized_data = tokenize_datasets(model_name, dataset)

            for loss_fn_name, loss_fn in loss_functions.items():
                print(f"\nTraining {model_name} with {loss_fn_name}")
                print(f"Using device: {device}")

                try:
                    # Clear CUDA cache
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()

                    # Initialize model
                    model = AutoModelForSequenceClassification.from_pretrained(
                        model_info["path"],
                        num_labels=2,
                        problem_type="single_label_classification"
                    ).to(device)

                    # Training arguments
                    training_args = TrainingArguments(
                        output_dir=f"{local_save_path}/{model_name}_{loss_fn_name}_full_ft",
                        eval_strategy="epoch",  # Updated from evaluation_strategy
                        save_strategy="epoch",
                        learning_rate=2e-5,
                        per_device_train_batch_size=8,
                        per_device_eval_batch_size=8,
                        num_train_epochs=3,
                        weight_decay=0.01,
                        load_best_model_at_end=True,
                        metric_for_best_model="f1",
                        report_to="wandb",
                        logging_steps=100,
                        fp16=True,
                        fp16_backend="auto",
                        gradient_checkpointing=True,
                        gradient_accumulation_steps=2,
                        warmup_ratio=0.1,
                        dataloader_num_workers=4,
                        dataloader_pin_memory=True,
                        seed=42,
                        remove_unused_columns=False
                    )

                    # Initialize trainer
                    trainer = CustomTrainer(
                        model=model,
                        args=training_args,
                        train_dataset=tokenized_data["train"],
                        eval_dataset=tokenized_data["validation"],
                        compute_metrics=compute_metrics,
                        loss_func=loss_fn

                    )


                    # Train and save
                    trainer.train()
                    # save_model_with_fallback(trainer, f"{model_name}_{loss_fn_name}_full_ft", training_type="full_ft")

                    # Save with relationship and objective info
                    save_model_with_fallback(
                        trainer,
                        f"{model_name}_{loss_fn_name}_full_ft",
                        training_type="full_ft",
                        relation_type=relation_type,
                        objective=objective
                    )


                except Exception as e:
                    print(f"Error training {model_name} with {loss_fn_name}: {str(e)}")
                    continue

                finally:
                    # Cleanup
                    if 'trainer' in locals():
                        del trainer
                    if 'model' in locals():
                        del model
                    if torch.cuda.is_available():
                        torch.cuda.empty_cache()

        except Exception as e:
            print(f"Error processing model {model_name}: {str(e)}")
            continue

    print("\nFull Fine-Tuning completed!")
    return


def peft_fine_tune_all_models(model_checkpoints, dataset, loss_functions=None, peft_methods=None, relation_type=None, objective=None):
    # Initialize device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Get loss functions if not provided
    if loss_functions is None:
        loss_functions = get_loss_functions(device)

    def get_target_modules(model_path):
        """Get target modules based on model architecture"""
        if "bert" in model_path.lower():
            return [
                f"bert.encoder.layer.{i}.attention.self.query" for i in range(12)
            ] + [
                f"bert.encoder.layer.{i}.attention.self.key" for i in range(12)
            ] + [
                f"bert.encoder.layer.{i}.attention.self.value" for i in range(12)
            ] + [
                f"bert.encoder.layer.{i}.intermediate.dense" for i in range(12)
            ] + [
                f"bert.encoder.layer.{i}.output.dense" for i in range(12)
            ]
        elif "roberta" in model_path.lower():
            return ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"]
        elif "deberta" in model_path.lower():
            return ["query_proj", "key_proj", "value_proj", "dense"]
        else:
            return ["query", "key", "value", "dense"]

    def get_feedforward_modules(model_path):
        """Get feedforward modules based on model architecture"""
        if "bert" in model_path.lower():
            return [f"bert.encoder.layer.{i}.intermediate.dense" for i in range(12)]
        elif "roberta" in model_path.lower():
            return ["fc1", "fc2"]
        else:
            return ["dense"]

    # Define PEFT methods with IA3 configuration for encoder-only models
    if peft_methods is None:
        peft_methods = {
            "lora": lambda path, modules: LoraConfig(
                task_type=TaskType.SEQ_CLS,
                r=16,
                lora_alpha=32,
                lora_dropout=0.1,
                bias="none",
                inference_mode=False,
                target_modules=modules,
                modules_to_save=["classifier"]
            ),
            "adalora": lambda path, modules: AdaLoraConfig(
                task_type=TaskType.SEQ_CLS,
                init_r=12,
                target_r=8,
                beta1=0.85,
                beta2=0.95,
                tinit=200,
                tfinal=1000,
                deltaT=10,
                lora_alpha=32,
                target_modules=modules,
                lora_dropout=0.1,
                inference_mode=False
            ),
            # Updated IA3 configuration for ENCODER_ONLY task type
            "ia3": lambda path, modules: IA3Config(
                task_type=TaskType.SEQ_CLS,  # Set for encoder-only models
                target_modules=modules,  # Includes both attention and feedforward layers
                feedforward_modules=[module for module in modules if "intermediate.dense" in module or "fc1" in module],  # Feedforward subset
                modules_to_save=["classifier"],  # Keep the classifier head trainable
                inference_mode=False
            )
        }

    # The rest of the code remains the same, iterating over models and configurations for fine-tuning
    for model_name, model_info in model_checkpoints.items():
        try:
            # Get target modules for this model
            target_modules = get_target_modules(model_info["path"])

            # Tokenize dataset specific to model
            tokenized_data = tokenize_datasets(model_name, dataset)

            for peft_name, peft_config_fn in peft_methods.items():
                for loss_fn_name, loss_fn in loss_functions.items():
                    try:
                        print(f"\nFine-tuning {model_name} with PEFT ({peft_name}) using {loss_fn_name}")
                        print(f"Using device: {device}")

                        # Initialize tokenizer and base model
                        tokenizer = AutoTokenizer.from_pretrained(model_info["path"])
                        base_model = AutoModelForSequenceClassification.from_pretrained(
                            model_info["path"],
                            num_labels=2,
                            problem_type="single_label_classification"
                        )

                        # Get PEFT configuration
                        peft_config = peft_config_fn(model_info["path"], target_modules)

                        # Update encoder_hidden_size for prefix tuning
                        if peft_name == "prefix":
                            peft_config.encoder_hidden_size = base_model.config.hidden_size

                        # Get PEFT model
                        model = get_peft_model(base_model, peft_config)
                        print(f"\nTrainable parameters for {peft_name}:")
                        model.print_trainable_parameters()

                        # Move model to device
                        model = model.to(device)

                        # Ensure loss function is on correct device
                        loss_fn = loss_fn.to(device)

                        # Define training arguments
                        training_args = TrainingArguments(
                            output_dir=f"{local_save_path}/{model_name}_{loss_fn_name}_{peft_name}",
                            eval_strategy="epoch",
                            save_strategy="epoch",
                            learning_rate=2e-5,
                            per_device_train_batch_size=8,
                            per_device_eval_batch_size=8,
                            num_train_epochs=3,
                            weight_decay=0.01,
                            load_best_model_at_end=True,
                            metric_for_best_model="f1",
                            logging_dir="./logs",
                            report_to="wandb",
                            logging_steps=100,
                            fp16=True,
                            fp16_backend="auto",
                            gradient_checkpointing=True,
                            gradient_accumulation_steps=4,
                            optim="adamw_torch",
                            warmup_ratio=0.1,
                            dataloader_num_workers=4,
                            dataloader_pin_memory=True,
                            seed=42
                        )

                        # Initialize trainer
                        trainer = CustomTrainer(
                            model=model,
                            args=training_args,
                            train_dataset=tokenized_data["train"],
                            eval_dataset=tokenized_data["validation"],
                            # tokenizer=tokenizer,
                            data_collator=DataCollatorWithPadding(tokenizer),
                            compute_metrics=compute_metrics,
                            loss_func=loss_fn
                        )

                        # Train the model
                        trainer.train()

                        # Save the model and adapter
                        # output_dir = f"{local_save_path}/{model_name}_{loss_fn_name}_{peft_name}"
                        # save_model_with_fallback(trainer, output_dir)
                        # model.save_pretrained(f"{output_dir}/adapter")
                        # save_model_with_fallback(trainer, f"{model_name}_{loss_fn_name}_{peft_name}", training_type="peft")

                        # Save with relationship and objective info
                        save_model_with_fallback(
                            trainer,
                            f"{model_name}_{loss_fn_name}_{peft_name}",
                            training_type="peft",
                            relation_type=relation_type,
                            objective=objective
                        )

                        # Clear memory
                        del model, base_model
                        torch.cuda.empty_cache()

                    except Exception as e:
                        print(f"Error training {model_name} with {peft_name} and {loss_fn_name}: {str(e)}")
                        continue

        except Exception as e:
            print(f"Error processing model {model_name}: {str(e)}")
            continue

    print("\nPEFT fine-tuning completed!")
    return


BitFit UbitFit and SBitFit PEFT

In [None]:
def apply_standard_bitfit(model):
   for name, param in model.named_parameters():
       param.requires_grad = 'bias' in name

def apply_u_bitfit(model, train_dataloader, k=100):
    model.zero_grad()
    # Ensure classifier parameters are always trainable
    for name, param in model.named_parameters():
        if 'classifier' in name:
            param.requires_grad = True

    importance_scores = {name: 0.0 for name, param in model.named_parameters()
                        if 'bias' in name and 'classifier' not in name}
    model.train()

    # Process one batch to calculate importance scores
    for batch in train_dataloader:
        input_ids = batch['input_ids'].to(model.device)
        attention_mask = batch['attention_mask'].to(model.device)
        labels = batch['labels'].to(model.device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        loss = outputs.loss
        loss.backward()

        for name, param in model.named_parameters():
            if 'bias' in name and param.grad is not None and 'classifier' not in name:
                importance_scores[name] += (param * param.grad).abs().sum().item()
        break

    # Select top k parameters
    top_k_params = sorted(importance_scores.items(), key=lambda x: x[1], reverse=True)[:k]
    top_k_names = [name for name, _ in top_k_params]

    # Set requires_grad
    for name, param in model.named_parameters():
        if 'classifier' not in name:  # Don't modify classifier params
            param.requires_grad = name in top_k_names

    model.zero_grad()

def apply_s_bitfit(model, train_dataloader, k=2):
    model.zero_grad()
    # Ensure classifier parameters are always trainable
    for name, param in model.named_parameters():
        if 'classifier' in name:
            param.requires_grad = True

    layer_importance = {}
    model.train()

    # Process one batch to calculate layer importance
    for batch in train_dataloader:
        input_ids = batch['input_ids'].to(model.device)
        attention_mask = batch['attention_mask'].to(model.device)
        labels = batch['labels'].to(model.device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        loss = outputs.loss
        loss.backward()

        for name, param in model.named_parameters():
            if 'bias' in name and 'classifier' not in name:
                layer_name = name.rsplit('.', 2)[0]
                if layer_name not in layer_importance:
                    layer_importance[layer_name] = 0.0
                layer_importance[layer_name] += (param * param.grad).abs().sum().item()
        break

    # Select top k layers
    top_k_layers = sorted(layer_importance.items(), key=lambda x: x[1], reverse=True)[:k]
    top_k_layer_names = [name for name, _ in top_k_layers]

    # Set requires_grad
    for name, param in model.named_parameters():
        if 'classifier' not in name:  # Don't modify classifier params
            layer_name = name.rsplit('.', 2)[0]
            param.requires_grad = layer_name in top_k_layer_names

    model.zero_grad()

def bitfit_peft_fine_tune_all_models(model_checkpoints, dataset, loss_functions=None, peft_method=None, k=100, relation_type=None, objective=None):
   device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
   if loss_functions is None:
       loss_functions = get_loss_functions(device)

   for model_name, model_info in model_checkpoints.items():
       try:
           print(f"\nProcessing {model_name} with {peft_method}")
           tokenized_data = tokenize_datasets(model_name, dataset)
           tokenizer = AutoTokenizer.from_pretrained(model_info["path"])

           for loss_fn_name, loss_fn in loss_functions.items():
               try:
                   print(f"\nTraining with {loss_fn_name}")
                   if torch.cuda.is_available():
                       torch.cuda.empty_cache()

                   model = AutoModelForSequenceClassification.from_pretrained(
                       model_info["path"],
                       num_labels=2,
                       problem_type="single_label_classification"
                   ).to(device)

                   dataloader = DataLoader(
                    tokenized_data["train"],
                    batch_size=8,
                    shuffle=True,
                    num_workers=4,
                    pin_memory=True,
                    collate_fn=DataCollatorWithPadding(tokenizer)  # Add this line
                    )


                   if peft_method == "BitFit Full Fine-Tuning":
                       apply_standard_bitfit(model)
                   elif peft_method == "U-BitFit":
                       apply_u_bitfit(model, dataloader, k)
                   elif peft_method == "S-BitFit":
                       apply_s_bitfit(model, dataloader, k=2)

                   training_args = TrainingArguments(
                       output_dir=f"{local_save_path}/{model_name}_{peft_method}_{loss_fn_name}",
                       eval_strategy="epoch",
                       save_strategy="epoch",
                       learning_rate=2e-5,
                       per_device_train_batch_size=8,
                       per_device_eval_batch_size=8,
                       num_train_epochs=3,
                       weight_decay=0.01,
                       load_best_model_at_end=True,
                       metric_for_best_model="f1",
                       report_to="wandb",
                       logging_steps=100,
                       fp16=True,
                       fp16_backend="auto",
                       gradient_checkpointing=True,
                       gradient_accumulation_steps=2,
                       warmup_ratio=0.1,
                       dataloader_num_workers=4,
                       dataloader_pin_memory=True,
                       seed=42,
                       remove_unused_columns=False
                   )

                   trainer = CustomTrainer(
                       model=model,
                       args=training_args,
                       train_dataset=tokenized_data["train"],
                       eval_dataset=tokenized_data["validation"],
                       tokenizer=tokenizer,
                       data_collator=DataCollatorWithPadding(tokenizer),
                       compute_metrics=compute_metrics,
                       loss_func=loss_fn
                   )
                   # Train and save
                   trainer.train()

                  #  save_model_with_fallback(trainer, f"{model_name}_{peft_method}_{loss_fn_name}")
                  #  save_model_with_fallback(trainer, f"{model_name}_{peft_method}_{loss_fn_name}", training_type="bitfit")

                   # Save with relationship and objective info
                   save_model_with_fallback(
                        trainer,
                        f"{model_name}_{peft_method}_{loss_fn_name}",
                        training_type="bitfit",
                        relation_type=relation_type,
                        objective=objective
                      )

               except Exception as e:
                   print(f"Error training with {loss_fn_name}: {str(e)}")
                   continue

               finally:
                   if 'trainer' in locals():
                       del trainer
                   if 'model' in locals():
                       del model
                   if torch.cuda.is_available():
                       torch.cuda.empty_cache()

       except Exception as e:
           print(f"Error processing {model_name}: {str(e)}")
           continue

   print(f"\n{peft_method} completed!")
   return

Adaptor PEFT

In [None]:
import os
import torch
import wandb
import logging
from pathlib import Path
from typing import Dict, Any, Optional, Callable
from transformers import TrainingArguments, EarlyStoppingCallback


def adapter_fine_tune_all_models(
    model_checkpoints: Dict[str, Dict[str, Any]],
    dataset: Any,
    loss_functions: Optional[Dict[str, Callable]] = None,
    local_save_path: str = "./adapter_checkpoints",
    relation_type=None, objective=None
) -> None:
    """Fine-tune models using all supported adapter configurations"""
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    loss_functions = loss_functions or {"default": None}

    # Define comprehensive adapter configurations
    adapter_configs = {
        "seq_bn": SeqBnConfig(),  # Sequential bottleneck adapter
        "double_seq_bn": DoubleSeqBnConfig(),  # Double sequential bottleneck
        "par_bn": ParBnConfig(),  # Parallel bottleneck
        "scaled_par_bn": ParBnConfig(scaling="learned"),  # Scaled parallel bottleneck
        "seq_bn_inv": SeqBnInvConfig(),  # Sequential invertible adapter
        "double_seq_bn_inv": DoubleSeqBnInvConfig(),  # Double sequential invertible
        "compacter": CompacterConfig(),  # Compacter
        "compacter++": CompacterPlusPlusConfig(),  # Compacter++
        "prefix_tuning": PrefixTuningConfig(),  # Standard prefix tuning
        # "prefix_tuning_flat": PrefixTuningConfig(flat=True),  # Flat prefix tuning
        "lora": LoRAConfig(),  # LoRA
        "ia3": IA3Config(),  # IA³
        "mam": MAMConfig(),  # Mix-and-Match
        "unipelt": UniPELTConfig(),  # UniPELT
        "prompt_tuning": PromptTuningConfig(),  # Prompt Tuning
        "loreft": LoReftConfig(),  # ReFT
        "noreft": NoReftConfig(),  # NoReFT
        "direft": DiReftConfig()  # DiReFT
    }

    for model_name, model_info in model_checkpoints.items():
        logger.info(f"\nProcessing {model_name}")

        try:
            tokenized_data = tokenize_datasets(model_name, dataset)

            for adapter_name, adapter_config in adapter_configs.items():
                for loss_fn_name, loss_fn in loss_functions.items():
                    try:
                        logger.info(f"\nTraining with {adapter_name} adapter and {loss_fn_name}")

                        if torch.cuda.is_available():
                            torch.cuda.empty_cache()

                        # Initialize model with AutoAdapterModel
                        model = AutoAdapterModel.from_pretrained(
                            model_info["path"],
                            num_labels=2,
                            trust_remote_code=True
                        ).to(device)

                        # Add adapter with specific configuration
                        adapter_id = f"{model_name}_{adapter_name}"
                        model.add_adapter(adapter_id, config=adapter_config)

                        # Add classification head
                        model.add_classification_head(
                            adapter_id,
                            num_labels=2,
                            id2label={0: "False", 1: "True"}
                        )

                        # Activate the adapter
                        # model.set_active_adapters(adapter_id)

                        # Activate adapter for training
                        model.train_adapter(adapter_id)

                        # Training arguments with adapter-specific settings
                        training_args = TrainingArguments(
                            output_dir=f"{local_save_path}/{model_name}_{adapter_name}_{loss_fn_name}",
                            learning_rate=1e-4,
                            num_train_epochs=6,
                            per_device_train_batch_size=32,
                            per_device_eval_batch_size=32,
                            logging_steps=200,
                            save_strategy="epoch",
                            eval_strategy="epoch",
                            load_best_model_at_end=True,
                            metric_for_best_model="eval_loss",
                            greater_is_better=False,
                            remove_unused_columns=False,
                            fp16=True,
                            gradient_checkpointing=False,
                            warmup_ratio=0.1,
                            weight_decay=0.01,
                            report_to="wandb"
                        )

                        # Initialize AdapterTrainer
                        trainer = CustomAdapterTrainer(
                            model=model,
                            args=training_args,
                            train_dataset=tokenized_data["train"],
                            eval_dataset=tokenized_data["validation"],
                            compute_metrics=compute_metrics,
                            callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
                        )

                        # Train
                        trainer.train()

                        # Save using existing save_model_with_fallback function
                        # model_save_name = f"{model_name}_{adapter_name}_{loss_fn_name}"
                        # save_model_with_fallback(trainer, model_save_name, training_type="adapter")


                        # Save with relationship and objective info
                        model_save_name = f"{model_name}_{adapter_name}_{loss_fn_name}"
                        save_model_with_fallback(
                            trainer,
                            model_save_name,
                            training_type="adapter",
                            relation_type=relation_type,
                            objective=objective
                        )


                        # Also save the adapter weights separately
                        try:
                            adapter_save_path = os.path.join(local_save_path, f"{model_save_name}_adapter")
                            model.save_adapter(adapter_save_path, adapter_id)
                            logger.info(f"Adapter weights saved to {adapter_save_path}")
                        except Exception as e:
                            logger.warning(f"Failed to save adapter weights: {str(e)}")

                        # Log adapter size to wandb if available
                        try:
                            adapter_size = Path(adapter_save_path).stat().st_size / (1024 * 1024)  # Size in MB
                            wandb.log({f"{adapter_name}_size_mb": adapter_size})
                        except Exception as e:
                            logger.warning(f"Failed to log adapter size: {str(e)}")

                    except Exception as e:
                        logger.error(f"Error training {model_name} with {adapter_name} and {loss_fn_name}: {str(e)}")
                        continue

                    finally:
                        if 'model' in locals():
                            del model
                        if torch.cuda.is_available():
                            torch.cuda.empty_cache()

        except Exception as e:
            logger.error(f"Error processing {model_name}: {str(e)}")
            continue

    logger.info("\nAdapter fine-tuning completed!")


Mixmatch_Adapter

In [None]:
# def mixmatch_adapter_fine_tune(model_checkpoints, dataset, loss_functions=None):
#     """Fine-tune models using Mix-and-Match adapter configurations"""
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#     if loss_functions is None:
#         loss_functions = get_loss_functions(device)

#     # Define Mix-and-Match configurations
#     mixmatch_configs = {
#         "mam_basic": lambda: MAMConfig(
#             bottleneck_size=800
#         ),
#         "mam_custom": lambda: ConfigUnion(
#             PrefixTuningConfig(bottleneck_size=800, prefix_length=30),
#             ParBnConfig(reduction_factor=16)
#         ),
#         "unipelt_basic": lambda: UniPELTConfig(),
#         "unipelt_custom": lambda: ConfigUnion(
#             LoRAConfig(r=8, alpha=2, use_gating=True),
#             PrefixTuningConfig(prefix_length=30, use_gating=True),
#             SeqBnConfig(reduction_factor=16, use_gating=True)
#         )
#     }

#     for model_name, model_info in model_checkpoints.items():
#         try:
#             print(f"\nProcessing {model_name}")
#             tokenized_data = tokenize_datasets(model_name, dataset)

#             for config_name, config_fn in mixmatch_configs.items():
#                 for loss_fn_name, loss_fn in loss_functions.items():
#                     try:
#                         print(f"\nTraining with {config_name} configuration and {loss_fn_name}")

#                         # Initialize model
#                         model = AutoModelForSequenceClassification.from_pretrained(
#                             model_info["path"],
#                             num_labels=2
#                         ).to(device)

#                         # Add adapter with mix-match configuration
#                         adapter_config = config_fn()
#                         model.add_adapter(f"{config_name}_adapter", config=adapter_config)
#                         model.train_adapter(f"{config_name}_adapter")

#                         # Add gating callback for UniPELT configurations
#                         callbacks = []
#                         if "unipelt" in config_name:
#                             callbacks.append(GatingScoreCallback)

#                         # Training arguments
#                         training_args = TrainingArguments(
#                             output_dir=f"{local_save_path}/{model_name}_{config_name}_{loss_fn_name}",
#                             eval_strategy="epoch",
#                             save_strategy="epoch",
#                             learning_rate=2e-5,
#                             per_device_train_batch_size=8,
#                             per_device_eval_batch_size=8,
#                             num_train_epochs=3,
#                             weight_decay=0.01,
#                             load_best_model_at_end=True,
#                             metric_for_best_model="f1",
#                             logging_dir="./logs",
#                             report_to="wandb",
#                             logging_steps=100,
#                             fp16=True,
#                             gradient_checkpointing=True,
#                             gradient_accumulation_steps=4,
#                             warmup_ratio=0.1
#                         )

#                         # Initialize trainer
#                         trainer = CustomTrainer(
#                             model=model,
#                             args=training_args,
#                             train_dataset=tokenized_data["train"],
#                             eval_dataset=tokenized_data["validation"],
#                             compute_metrics=compute_metrics,
#                             loss_func=loss_fn,
#                             callbacks=callbacks
#                         )

#                         # Train
#                         trainer.train()

#                         # Save adapter and additional data
#                         output_dir = f"{local_save_path}/{model_name}_{config_name}_{loss_fn_name}"
#                         model.save_all_adapters(output_dir)

#                         # Save gating scores for UniPELT
#                         if "unipelt" in config_name:
#                             outputs = trainer.model(
#                                 **trainer.model.dummy_inputs,
#                                 output_adapter_gating_scores=True
#                             )
#                             if hasattr(outputs, "adapter_gating_scores"):
#                                 with open(f"{output_dir}/gating_scores.pkl", "wb") as f:
#                                     pickle.dump(outputs.adapter_gating_scores, f)

#                     except Exception as e:
#                         print(f"Error training {model_name} with {config_name} and {loss_fn_name}: {str(e)}")
#                         continue

#                     finally:
#                         cleanup()

#         except Exception as e:
#             print(f"Error processing {model_name}: {str(e)}")
#             continue

#     print("\nMix-Match adapter fine-tuning completed!")
#     return

In [None]:
                        # Save the model and adapter
                        # output_dir = f"{local_save_path}/{model_name}_{loss_fn_name}_{peft_name}"
                        # save_model_with_fallback(trainer, output_dir)
                        # model.save_pretrained(f"{output_dir}/adapter")


# Step 14: Inference Function

In [None]:
# def run_inference_and_save_results(model_checkpoints, test_df, results_dir):
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     predictions_df_list = []

#     # Get list of all trained models
#     loss_functions = get_loss_functions(device)

#     for model_name, model_info in model_checkpoints.items():
#         # For each training method (full fine-tuning with different loss functions)
#         for loss_fn_name in loss_functions.keys():
#             try:
#                 # Full fine-tuning model
#                 full_ft_model_name = f"{model_name}_{loss_fn_name}_full_ft"
#                 model = load_best_model(full_ft_model_name).to(device)
#                 tokenizer = AutoTokenizer.from_pretrained(model_info["path"])

#                 inputs = tokenizer(
#                     list(test_df["text"]),
#                     truncation=True,
#                     padding=True,
#                     max_length=model_info["max_length"],
#                     return_tensors="pt"
#                 ).to(device)

#                 with torch.no_grad():
#                     outputs = model(**inputs)
#                     preds = outputs.logits.argmax(dim=-1).cpu().numpy()

#                 result_df = test_df.copy()
#                 result_df["prediction"] = preds
#                 predictions_df_list.append((full_ft_model_name, result_df))

#                 # PEFT model
#                 peft_model_name = f"{model_name}_{loss_fn_name}_peft_lora"
#                 peft_model_path = os.path.join(local_save_path, peft_model_name, "adapter")

#                 if os.path.exists(peft_model_path):
#                     model = load_best_model(peft_model_name).to(device)

#                     with torch.no_grad():
#                         outputs = model(**inputs)
#                         preds = outputs.logits.argmax(dim=-1).cpu().numpy()

#                     result_df = test_df.copy()
#                     result_df["prediction"] = preds
#                     predictions_df_list.append((peft_model_name, result_df))

#                 # Clear memory
#                 del model
#                 torch.cuda.empty_cache()

#             except Exception as e:
#                 print(f"Error during inference for {model_name}: {e}")
#                 continue

#     # Save all predictions
#     for model_name, result_df in predictions_df_list:
#         result_file_path = os.path.join(results_dir, f"{model_name}_predictions.csv")
#         result_df.to_csv(result_file_path, index=False)
#         print(f"Saved predictions for {model_name} to {result_file_path}")


In [None]:
def run_inference_and_save_results(model_checkpoints, test_df, results_dir):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    predictions_df_list = []

    loss_functions = get_loss_functions(device)

    for model_name, model_info in model_checkpoints.items():
        # For each training method
        training_methods = {
            "full_ft": "_full_ft",
            "lora": "_peft_lora",
            "bitfit": ["_BitFit_Full_Fine-Tuning", "_U-BitFit", "_S-BitFit"],
            "adapter": "_combined_adapter"
        }

        try:
            tokenizer = AutoTokenizer.from_pretrained(model_info["path"])
            inputs = tokenizer(
                list(test_df["text"]),
                truncation=True,
                padding=True,
                max_length=model_info["max_length"],
                return_tensors="pt"
            ).to(device)

            for method, suffix in training_methods.items():
                if isinstance(suffix, list):
                    # Handle multiple variants (BitFit)
                    for variant_suffix in suffix:
                        try:
                            model_path = f"{model_name}{variant_suffix}"
                            if method == "adapter":
                                # Load adapter model
                                model = AutoAdapterModel.from_pretrained(
                                    model_info["path"],
                                    num_labels=2
                                ).to(device)
                                model.load_adapter(f"{local_save_path}/{model_path}/custom_adapter")
                                model.load_adapter_fusion(f"{local_save_path}/{model_path}/fusion")
                                model.set_active_adapters(["custom_adapter"])
                            else:
                                # Load regular model
                                model = load_best_model(model_path).to(device)

                            with torch.no_grad():
                                outputs = model(**inputs)
                                preds = outputs.logits.argmax(dim=-1).cpu().numpy()

                            result_df = test_df.copy()
                            result_df["prediction"] = preds
                            predictions_df_list.append((f"{model_path}", result_df))

                        except Exception as e:
                            print(f"Error during inference for {model_path}: {e}")
                            continue
                else:
                    # Handle single method
                    try:
                        model_path = f"{model_name}{suffix}"
                        if method == "adapter":
                            model = AutoAdapterModel.from_pretrained(
                                model_info["path"],
                                num_labels=2
                            ).to(device)
                            model.load_adapter(f"{local_save_path}/{model_path}/custom_adapter")
                            model.load_adapter_fusion(f"{local_save_path}/{model_path}/fusion")
                            model.set_active_adapters(["custom_adapter"])
                        else:
                            model = load_best_model(model_path).to(device)

                        with torch.no_grad():
                            outputs = model(**inputs)
                            preds = outputs.logits.argmax(dim=-1).cpu().numpy()

                        result_df = test_df.copy()
                        result_df["prediction"] = preds
                        predictions_df_list.append((f"{model_path}", result_df))

                    except Exception as e:
                        print(f"Error during inference for {model_path}: {e}")
                        continue

        except Exception as e:
            print(f"Error processing model {model_name}: {e}")
            continue

    # Save all predictions
    for model_name, result_df in predictions_df_list:
        result_file_path = os.path.join(results_dir, f"{model_name}_predictions.csv")
        result_df.to_csv(result_file_path, index=False)
        print(f"Saved predictions for {model_name} to {result_file_path}")

# Step 15: Main Execution

In [None]:
# def main():
#     """Main execution function for adapter fine-tuning experiments"""
#     # Set up logging configuration first
#     logging.basicConfig(
#         level=logging.INFO,
#         format='%(asctime)s - %(levelname)s - %(message)s',
#         handlers=[
#             logging.StreamHandler(),  # Print to console
#             logging.FileHandler('experiment.log')  # Save to file
#         ]
#     )
#     # Create logger instance
#     logger = logging.getLogger("greenland_experiments")

#     # Initialize device
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     logger.info(f"Using device: {device}")

#     # Log GPU information if available
#     if torch.cuda.is_available():
#         logger.info(f"GPU: {torch.cuda.get_device_name()}")
#         logger.info(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

#     # Initialize loss functions
#     loss_functions = get_loss_functions(device)
#     logger.info(f"Initialized loss functions: {list(loss_functions.keys())}")

#     # Define experiment configurations FIRST
#     experiment_config = {
#         "full_fine_tuning": {
#             "enabled": False,
#             "name": "Full Fine-Tuning",
#             "function": full_fine_tune_all_models,
#             "args": {
#                 "model_checkpoints": model_checkpoints,
#                 "dataset": dataset,
#                 "loss_functions": loss_functions
#             }
#         },
#         "adapter": {
#             "enabled": True,
#             "name": "Adapter Fine-Tuning",
#             "function": adapter_fine_tune_all_models,
#             "args": {
#                 "model_checkpoints": model_checkpoints,
#                 "dataset": dataset,
#                 "loss_functions": loss_functions,
#             }
#         },
#         "hugging_face_peft": {
#             "enabled": False,
#             "name": "LoRA Adaptation",
#             "function": peft_fine_tune_all_models,
#             "args": {
#                 "model_checkpoints": model_checkpoints,
#                 "dataset": dataset,
#                 "loss_functions": loss_functions,
#             }
#         },
#         "bitfit": {
#             "enabled": False,
#             "name": "BitFit Variants",
#             "variants": ["BitFit Full Fine-Tuning", "U-BitFit", "S-BitFit"],
#             "function": bitfit_peft_fine_tune_all_models,
#             "args": {
#                 "model_checkpoints": model_checkpoints,
#                 "dataset": dataset,
#                 "loss_functions": loss_functions,
#                 "k": 100
#             }
#         }
#     }

#     # THEN calculate total combinations for each approach
#     total_count = {
#         "full_fine_tuning": len(model_checkpoints) * len(loss_functions),
#         "adapter": len(model_checkpoints) * len(loss_functions) * 18,  # 18 adapter techniques
#         "hugging_face_peft": len(model_checkpoints) * len(loss_functions) * 4,  # 4 PEFT techniques
#         "bitfit": len(model_checkpoints) * len(loss_functions) * 3  # 3 BitFit variants
#     }

#     # Print experiment plan
#     logger.info("\n" + "="*50)
#     logger.info("EXPERIMENT PLAN")
#     logger.info("="*50)

#     # Log enabled experiments
#     enabled_experiments = [config["name"] for name, config in experiment_config.items() if config["enabled"]]
#     logger.info(f"Enabled experiments: {enabled_experiments}")

#     try:
#         # Initialize wandb run with configuration
#         wandb.init(
#             project="greenland",
#             config={
#                 "device": str(device),
#                 "enabled_experiments": enabled_experiments,
#                 "loss_functions": list(loss_functions.keys()),
#                 "models": list(model_checkpoints.keys())
#             }
#         )

#         # Analyze and log dataset characteristics
#         logger.info("Analyzing dataset characteristics...")
#         dataset_stats = analyze_text_lengths(dataset)
#         wandb.log({"dataset_stats": dataset_stats})

#         total_experiments = len([exp for exp in experiment_config.values() if exp["enabled"]])
#         completed_experiments = 0

#         # Run enabled experiments
#         for exp_name, exp_config in experiment_config.items():
#             if not exp_config["enabled"]:
#                 logger.info(f"Skipping {exp_config['name']} (disabled)")
#                 continue

#             try:
#                 logger.info(f"\nStarting {exp_config['name']} experiments...")
#                 start_time = time.time()

#                 if exp_name == "bitfit":
#                     # Special handling for BitFit variants
#                     for variant in exp_config["variants"]:
#                         logger.info(f"\nRunning {variant}...")
#                         exp_config["function"](
#                             peft_method=variant,
#                             **exp_config["args"]
#                         )
#                 else:
#                     # Run standard experiment
#                     exp_config["function"](**exp_config["args"])

#                 # Log experiment completion time
#                 duration = time.time() - start_time
#                 logger.info(f"Completed {exp_config['name']} in {duration:.2f} seconds")
#                 wandb.log({f"{exp_config['name']}_duration": duration})

#                 completed_experiments += 1
#                 logger.info(f"Progress: {completed_experiments}/{total_experiments} experiments completed")

#             except Exception as exp_error:
#                 logger.error(f"Error in {exp_config['name']}: {str(exp_error)}")
#                 wandb.log({f"{exp_config['name']}_error": str(exp_error)})
#                 continue

#             finally:
#                 # Cleanup after each experiment
#                 cleanup()

#         # Run inference if specified
#         if results_dir:
#             pass  # Commented out inference for now
#             # logger.info("\nRunning inference on best models...")
#             # inference_results = run_inference_and_save_results(
#             #     model_checkpoints=model_checkpoints,
#             #     test_df=test_df,
#             #     results_dir=results_dir
#             # )
#             # wandb.log({"inference_results": inference_results})

#     except Exception as e:
#         logger.error(f"Critical error in experiment execution: {str(e)}")
#         wandb.log({"critical_error": str(e)})
#         raise

#     finally:
#         # Cleanup and finalize
#         cleanup()

#         # Log final status to wandb
#         if wandb.run is not None:
#             wandb.log({
#                 "completed_experiments": completed_experiments,
#                 "total_experiments": total_experiments,
#                 "completion_rate": completed_experiments / total_experiments if total_experiments > 0 else 0
#             })
#             wandb.finish()

#         logger.info("\nExperiment suite completed!")

# if __name__ == "__main__":
#     # Import required modules
#     import logging
#     import time
#     import sys
#     import torch
#     import wandb
#     from pathlib import Path

#     try:
#         # Set up wandb authentication
#         authenticate_wandb()

#         # Run main function
#         main()
#     except KeyboardInterrupt:
#         logging.getLogger("greenland_experiments").info("Experiment interrupted by user")
#         cleanup()
#         sys.exit(0)
#     except Exception as e:
#         logging.getLogger("greenland_experiments").error(f"Unhandled exception: {str(e)}")
#         cleanup()
#         raise

Linguistic Configuration

In [None]:
# Define training configurations
LINGUISTIC_RELATIONS = {
    "genetic": {
        "description": "Language relationships based on genetic/family groupings",
        "objectives": {
            "head_to_tail": "Cross-lingual transfer from high-resource to low-resource languages",
            "head_and_tail": "Multilingual training with both high and low resource languages",
            "tail_to_tail": "Low-resource transfer between similar low-resource languages"
        }
    },
    "script": {
        "description": "Language relationships based on writing systems",
        "objectives": {
            "head_to_tail": "Cross-lingual transfer from high-resource to low-resource languages",
            "head_and_tail": "Multilingual training with both high and low resource languages",
            "tail_to_tail": "Low-resource transfer between similar low-resource languages"
        }
    },
    "word_order": {
        "description": "Language relationships based on syntactic structure",
        "objectives": {
            "head_to_tail": "Cross-lingual transfer from high-resource to low-resource languages",
            "head_and_tail": "Multilingual training with both high and low resource languages",
            "tail_to_tail": "Low-resource transfer between similar low-resource languages"
        }
    }
}


def load_linguistic_datasets(relation_type, objective, sample_size=1000):
    """
    Load datasets for a specific linguistic relation and training objective

    Args:
        relation_type (str): One of 'genetic', 'script', or 'word_order'
        objective (str): One of 'head_to_tail', 'head_and_tail', or 'tail_to_tail'
        sample_size (int): Number of samples to use (for testing, set to None for full dataset)
    """
    base_path = f'/content/drive/MyDrive/GREENLAND/Datasets/Consolidated_Data/Experiment_Training_Splits/{relation_type}/{objective}'

    try:
        train_df = pd.read_csv(f'{base_path}/train.csv')
        val_df = pd.read_csv(f'{base_path}/valid.csv')
        test_df = pd.read_csv(f'{base_path}/test.csv')

        logger.info(f"Successfully loaded data from {base_path}")
        logger.info(f"Original sizes - Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

        # Sample data if sample_size is provided
        if sample_size:
            train_df = train_df.sample(n=min(sample_size, len(train_df)), random_state=42)
            val_df = val_df.sample(n=min(sample_size, len(val_df)), random_state=42)
            test_df = test_df.sample(n=min(sample_size, len(test_df)), random_state=42)
            logger.info(f"Sampled sizes - Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

        # Convert to HuggingFace datasets
        train_dataset = Dataset.from_pandas(train_df)
        val_dataset = Dataset.from_pandas(val_df)
        test_dataset = Dataset.from_pandas(test_df)

        return {
            "train": train_dataset,
            "validation": val_dataset,
            "test": test_dataset
        }

    except FileNotFoundError as e:
        logger.error(f"Could not find data files in {base_path}: {str(e)}")
        raise
    except Exception as e:
        logger.error(f"Error loading datasets: {str(e)}")
        raise

def get_save_paths(relation_type, objective, model_name=None):
    """
    Generate save paths for a specific configuration

    Args:
        relation_type (str): Linguistic relationship type
        objective (str): Training objective
        model_name (str, optional): Model name for specific model saves
    """
    base_name = f"{relation_type}_{objective}"
    if model_name:
        base_name = f"{base_name}_{model_name}"

    paths = {
        "local": os.path.join(local_save_path, base_name),
        "drive": os.path.join(drive_save_path, base_name),
        "results": os.path.join(results_dir, base_name)
    }

    # Create directories if they don't exist
    for path in paths.values():
        os.makedirs(path, exist_ok=True)

    return paths

def train_linguistic_configuration(relation_type, objective, model_checkpoints, training_method="adapter"):
    """
    Train models for a specific linguistic relation and objective

    Args:
        relation_type (str): Linguistic relationship type
        objective (str): Training objective
        model_checkpoints (dict): Model configurations
        training_method (str): One of 'adapter', 'full_ft', 'peft', 'bitfit'
    """
    # Initialize configuration name
    config_name = f"{relation_type}_{objective}_{training_method}"
    logger.info(f"\nStarting training for configuration: {config_name}")

    try:
        dataset = load_linguistic_datasets(relation_type, objective)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        loss_functions = get_loss_functions(device)

        wandb.init(
            project="greenland",
            name=config_name,
            config={
                "relation_type": relation_type,
                "objective": objective,
                "training_method": training_method,
                "models": list(model_checkpoints.keys()),
                "device": str(device)
            }
        )

        if training_method == "adapter":
            adapter_fine_tune_all_models(
                model_checkpoints=model_checkpoints,
                dataset=dataset,
                loss_functions=loss_functions,
                relation_type=relation_type,
                objective=objective
            )
        elif training_method == "full_ft":
            full_fine_tune_all_models(
                model_checkpoints=model_checkpoints,
                dataset=dataset,
                loss_functions=loss_functions,
                relation_type=relation_type,
                objective=objective
            )
        elif training_method == "peft":
            peft_fine_tune_all_models(
                model_checkpoints=model_checkpoints,
                dataset=dataset,
                loss_functions=loss_functions,
                relation_type=relation_type,
                objective=objective
            )
        elif training_method == "bitfit":
            for bitfit_variant in ["BitFit Full Fine-Tuning", "U-BitFit", "S-BitFit"]:
                bitfit_peft_fine_tune_all_models(
                    model_checkpoints=model_checkpoints,
                    dataset=dataset,
                    loss_functions=loss_functions,
                    peft_method=bitfit_variant,
                    k=100,
                    relation_type=relation_type,
                    objective=objective
                )

    except Exception as e:
        logger.error(f"Error in configuration {config_name}: {str(e)}")
        raise
    finally:
        wandb.finish()
        cleanup()

def main():
    """Main execution function for all configurations"""
    # Define configurations
    configurations = {
        "genetic": ["head_to_tail", "head_and_tail", "tail_to_tail"],
        "script": ["head_to_tail", "head_and_tail", "tail_to_tail"],
        "word_order": ["head_to_tail", "head_and_tail", "tail_to_tail"]
    }

    # Define training methods
    training_methods = ["full_ft"]  # Add other methods as needed

    # Initialize wandb
    authenticate_wandb()

    total_configs = len(configurations) * len(training_methods) * 3  # 3 objectives per relation
    completed_configs = 0

    try:
        for relation_type, objectives in configurations.items():
            for objective in objectives:
                for method in training_methods:
                    try:
                        logger.info(f"\nStarting configuration {completed_configs + 1}/{total_configs}")
                        logger.info(f"Relation: {relation_type}, Objective: {objective}, Method: {method}")

                        train_linguistic_configuration(
                            relation_type=relation_type,
                            objective=objective,
                            model_checkpoints=model_checkpoints,
                            training_method=method
                        )

                        completed_configs += 1
                        logger.info(f"Completed {completed_configs}/{total_configs} configurations")

                    except Exception as e:
                        logger.error(f"Error in configuration - Relation: {relation_type}, "
                                   f"Objective: {objective}, Method: {method}")
                        logger.error(str(e))
                        continue

                    finally:
                        cleanup()

    except KeyboardInterrupt:
        logger.info("\nExperiment interrupted by user")
    except Exception as e:
        logger.error(f"Unhandled exception in main execution: {str(e)}")
    finally:
        logger.info(f"\nExperiment completed. Successful configurations: {completed_configs}/{total_configs}")
        cleanup()

if __name__ == "__main__":
    main()

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Authentication successful!
[32m2024-11-29 07:44:47,072 - greenland_experiments - INFO - 
Starting configuration 1/9[0m
[32m2024-11-29 07:44:47,072 - greenland_experiments - INFO - 
Starting configuration 1/9[0m
[32m2024-11-29 07:44:47,076 - greenland_experiments - INFO - Relation: genetic, Objective: head_to_tail, Method: full_ft[0m
[32m2024-11-29 07:44:47,076 - greenland_experiments - INFO - Relation: genetic, Objective: head_to_tail, Method: full_ft[0m
[32m2024-11-29 07:44:47,080 - greenland_experiments - INFO - 
Starting training for configuration: genetic_head_to_tail_full_ft[0m
[32m2024-11-29 07:44:47,080 - greenland_experiments - INFO - 
Starting training for configuration: genetic_head_to_tail_full_ft[0m


  test_df = pd.read_csv(f'{base_path}/test.csv')


[32m2024-11-29 07:45:18,409 - greenland_experiments - INFO - Successfully loaded data from /content/drive/MyDrive/GREENLAND/Datasets/Consolidated_Data/Experiment_Training_Splits/genetic/head_to_tail[0m
[32m2024-11-29 07:45:18,409 - greenland_experiments - INFO - Successfully loaded data from /content/drive/MyDrive/GREENLAND/Datasets/Consolidated_Data/Experiment_Training_Splits/genetic/head_to_tail[0m
[32m2024-11-29 07:45:18,414 - greenland_experiments - INFO - Original sizes - Train: 386127, Val: 42903, Test: 293969[0m
[32m2024-11-29 07:45:18,414 - greenland_experiments - INFO - Original sizes - Train: 386127, Val: 42903, Test: 293969[0m
[32m2024-11-29 07:45:18,627 - greenland_experiments - INFO - Sampled sizes - Train: 1000, Val: 1000, Test: 1000[0m
[32m2024-11-29 07:45:18,627 - greenland_experiments - INFO - Sampled sizes - Train: 1000, Val: 1000, Test: 1000[0m


VBox(children=(Label(value='0.014 MB of 0.014 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Using max_length=512 for model MBERT_uncased

Before tokenization:
Sample of original labels: [True, True, False, True, False]
Original label type: <class 'bool'>


Tokenizing train set (num_proc=4):   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing validation set (num_proc=4):   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing test set (num_proc=4):   0%|          | 0/1000 [00:00<?, ? examples/s]


After tokenization:
Sample of processed labels: [1, 1, 0, 1, 0]
Processed label type: <class 'int'>

Training MBERT_uncased with HuberLoss
Using device: cuda


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
0,No log,0.070252,0.679,0.806042,0.681307,0.986686,0.511862
2,0.077500,0.051614,0.859,0.898195,0.877292,0.920118,0.8258


Performing evaluation...
Running compute_metrics...
Performing evaluation...
Running compute_metrics...
Performing evaluation...
Running compute_metrics...


training_args.bin:   0%|          | 0.00/5.30k [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/669M [00:00<?, ?B/s]

[32m2024-11-29 07:47:00,183 - greenland_experiments - INFO - ✅ Full fine-tuned model saved to Hugging Face Hub as jslai/MBERT_uncased_HuberLoss_full_ft_genetic_head_to_tail_20241129-074625[0m
[32m2024-11-29 07:47:00,183 - greenland_experiments - INFO - ✅ Full fine-tuned model saved to Hugging Face Hub as jslai/MBERT_uncased_HuberLoss_full_ft_genetic_head_to_tail_20241129-074625[0m

Full Fine-Tuning completed!


0,1
eval/accuracy,▁██
eval/f1,▁██
eval/loss,█▁▁
eval/precision,▁█▇
eval/recall,█▁▃
eval/roc_auc,▁██
eval/runtime,█▄▁
eval/samples_per_second,▁▅█
eval/steps_per_second,▁▅█
train/epoch,▁▃▅██

0,1
eval/accuracy,0.859
eval/f1,0.89819
eval/loss,0.05161
eval/precision,0.87729
eval/recall,0.92012
eval/roc_auc,0.8258
eval/runtime,2.3757
eval/samples_per_second,420.92
eval/steps_per_second,52.615
total_flos,783018500751360.0


[32m2024-11-29 07:47:02,302 - greenland_experiments - INFO - Completed 1/9 configurations[0m
[32m2024-11-29 07:47:02,302 - greenland_experiments - INFO - Completed 1/9 configurations[0m
[32m2024-11-29 07:47:02,306 - greenland_experiments - INFO - 
Starting configuration 2/9[0m
[32m2024-11-29 07:47:02,306 - greenland_experiments - INFO - 
Starting configuration 2/9[0m
[32m2024-11-29 07:47:02,308 - greenland_experiments - INFO - Relation: genetic, Objective: head_and_tail, Method: full_ft[0m
[32m2024-11-29 07:47:02,308 - greenland_experiments - INFO - Relation: genetic, Objective: head_and_tail, Method: full_ft[0m
[32m2024-11-29 07:47:02,311 - greenland_experiments - INFO - 
Starting training for configuration: genetic_head_and_tail_full_ft[0m
[32m2024-11-29 07:47:02,311 - greenland_experiments - INFO - 
Starting training for configuration: genetic_head_and_tail_full_ft[0m
[32m2024-11-29 07:48:00,896 - greenland_experiments - INFO - Successfully loaded data from /content

Using max_length=512 for model MBERT_uncased

Before tokenization:
Sample of original labels: [False, False, True, True, False]
Original label type: <class 'bool'>


Tokenizing train set (num_proc=4):   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing validation set (num_proc=4):   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing test set (num_proc=4):   0%|          | 0/1000 [00:00<?, ? examples/s]


After tokenization:
Sample of processed labels: [0, 0, 1, 1, 0]
Processed label type: <class 'int'>

Training MBERT_uncased with HuberLoss
Using device: cuda


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
0,No log,0.066529,0.74,0.850575,0.74,1.0,0.5
2,0.074400,0.054303,0.852,0.899183,0.906593,0.891892,0.815177


Performing evaluation...
Running compute_metrics...
Performing evaluation...
Running compute_metrics...


VBox(children=(Label(value='0.023 MB of 0.023 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34m[1mwandb[0m: [32m[41mERROR[0m Control-C detected -- Run data was not synced


[32m2024-11-29 07:49:00,578 - greenland_experiments - INFO - 
Experiment interrupted by user[0m
[32m2024-11-29 07:49:00,578 - greenland_experiments - INFO - 
Experiment interrupted by user[0m
[32m2024-11-29 07:49:00,585 - greenland_experiments - INFO - 
Experiment completed. Successful configurations: 1/9[0m
[32m2024-11-29 07:49:00,585 - greenland_experiments - INFO - 
Experiment completed. Successful configurations: 1/9[0m
