In [1]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score, matthews_corrcoef, confusion_matrix, roc_auc_score, roc_curve
import numpy as np
import evaluate
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AlbertModel, AutoModel

2025-05-20 05:12:24.541060: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747717944.554528    1550 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747717944.558659    1550 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747717944.570268    1550 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747717944.570279    1550 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747717944.570281    1550 computation_placer.cc:177] computation placer alr

In [2]:
train = pd.read_csv('/home/ubuntu/pan25/task1/train.csv')
dev = pd.read_csv('/home/ubuntu/pan25/task1/val.csv')

In [3]:
train.isnull().sum(), dev.isnull().sum()

(id       0
 text     0
 model    0
 label    0
 genre    0
 dtype: int64,
 id       0
 text     0
 model    0
 label    0
 genre    0
 dtype: int64)

In [4]:
train['label'].value_counts(), dev['label'].value_counts()

(label
 1    14606
 0     9101
 Name: count, dtype: int64,
 label
 1    2312
 0    1277
 Name: count, dtype: int64)

In [5]:
text_column = "text"
y_label_column = "label"

class CustomDataset(Dataset):
    def __init__(self, df, tokenizer, text_column, label_column, max_length=512):
        self.texts = df[text_column].tolist()
        self.labels = torch.tensor(df[label_column].values, dtype=torch.long)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(0).long(),
            'attention_mask': encoding['attention_mask'].squeeze(0).long(),
            'label': self.labels[idx]
        }

tokenizer = AutoTokenizer.from_pretrained('albert-base-v2')
train_dataset = CustomDataset(train, tokenizer, text_column, y_label_column)
val_dataset = CustomDataset(dev, tokenizer, text_column, y_label_column)

In [6]:
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

class HardMoEClassifier(nn.Module):
    def __init__(self, num_labels=2, dropout_prob=0.1):
        super(HardMoEClassifier, self).__init__()
        self.base_model = AutoModel.from_pretrained('albert-base-v2', num_labels=num_labels)
        self.dropout = nn.Dropout(p=dropout_prob)
        self.experts = nn.ModuleList([
            nn.Linear(768, num_labels) for _ in range(6)
        ])
        self.gate = nn.Linear(768, len(self.experts))

    def forward(self, input_ids, attention_mask):
        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = outputs.last_hidden_state
        cls_token = hidden_state[:, 0, :]
        cls_token = self.dropout(cls_token)
        gate_logits = self.gate(cls_token)
        expert_choice = torch.argmax(gate_logits, dim=1)
        outputs = torch.zeros((input_ids.shape[0], self.experts[0].out_features)).to(input_ids.device)
        for i, expert in enumerate(self.experts):
            mask = expert_choice == i
            if mask.any():
                expert_output = expert(cls_token[mask])
                outputs[mask] = expert_output
        return outputs

In [7]:
import time
from sklearn.metrics import f1_score, recall_score
from torch.optim.lr_scheduler import ReduceLROnPlateau

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HardMoEClassifier(num_labels=2).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5, weight_decay=0.01)
criterion = nn.CrossEntropyLoss()
classification_report_metric = evaluate.load("bstrai/classification_report")
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=1)

In [8]:
import os
import time
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import (
    f1_score, recall_score, confusion_matrix,
    roc_auc_score, roc_curve, brier_score_loss, precision_score
)
import json

os.makedirs("metrics_logs", exist_ok=True)

epochs = 10
best_val_mean = 0
patience = 2
epochs_no_improve = 0
num_classes = 2

for epoch in range(epochs):
    start = time.time()
    model.train()
    epoch_train_loss = 0
    correct_train_preds = 0
    total_train_preds = 0

    for batch in train_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        epoch_train_loss += loss.item()
        correct_train_preds += (outputs.argmax(1) == labels).sum().item()
        total_train_preds += labels.size(0)

    train_loss = epoch_train_loss / len(train_loader)
    train_accuracy = correct_train_preds / total_train_preds

    model.eval()
    epoch_val_loss = 0
    correct_val_preds = 0
    total_val_preds = 0
    val_predicted_labels = []
    val_true_labels = []
    val_probabilities = []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)
            probs = torch.softmax(outputs, dim=1)

            epoch_val_loss += loss.item()
            correct_val_preds += (outputs.argmax(1) == labels).sum().item()
            total_val_preds += labels.size(0)
            val_predicted_labels.extend(outputs.argmax(1).cpu().numpy())
            val_true_labels.extend(labels.cpu().numpy())
            val_probabilities.extend(probs.cpu().numpy())

    end = time.time()
    epoch_duration = end - start
    val_loss = epoch_val_loss / len(val_loader)
    val_accuracy = correct_val_preds / total_val_preds
    val_f1 = f1_score(val_true_labels, val_predicted_labels, average='macro')
    val_recall = recall_score(val_true_labels, val_predicted_labels, average='macro')

    val_true_labels = np.array(val_true_labels)
    val_predicted_labels = np.array(val_predicted_labels)
    val_probabilities = np.array(val_probabilities)

    val_true_one_hot = np.zeros((val_true_labels.size, num_classes))
    val_true_one_hot[np.arange(val_true_labels.size), val_true_labels] = 1
    roc_auc_macro = roc_auc_score(val_true_one_hot, val_probabilities, multi_class='ovr', average='macro')

    brier = np.mean([
        brier_score_loss((val_true_labels == i).astype(int), val_probabilities[:, i])
        for i in range(num_classes)
    ])
    brier_complement = 1 - brier

    correct = (val_predicted_labels == val_true_labels).astype(int)
    unanswered = np.zeros_like(correct) 
    c_at_1 = (sum(correct) + sum(unanswered) * sum(correct) / len(correct)) / len(correct)

    precision = precision_score(val_true_labels, val_predicted_labels, average='macro')
    recall = recall_score(val_true_labels, val_predicted_labels, average='macro')
    f05u = (1 + 0.5 ** 2) * (precision * recall) / ((0.5 ** 2 * precision) + recall + 1e-10)
    confusion = confusion_matrix(val_true_labels, val_predicted_labels).tolist()
    mean_metric = np.mean([roc_auc_macro, brier_complement, c_at_1, val_f1, f05u])

    json_data = {
        "roc-auc": round(roc_auc_macro, 4),
        "brier": round(brier_complement, 4),
        "c@1": round(c_at_1, 4),
        "f1": round(val_f1, 4),
        "f05u": round(f05u, 4),
        "mean": round(mean_metric, 4),
        "confusion": confusion
    }

    with open(f"metrics_logs/epoch_{epoch+1}_metrics.json", "w") as f:
        json.dump(json_data, f, indent=4)

    print(f"\nEpoch {epoch+1}/{epochs}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}, Val Macro F1: {val_f1:.4f}, Val Macro Recall: {val_recall:.4f}")
    print(f"Time taken: {epoch_duration:.2f}s")
    print("Metrics JSON:", json_data)

    scheduler.step(val_loss)

    if mean_metric > best_val_mean:
        best_val_mean = mean_metric
        epochs_no_improve = 0
        torch.save(model.state_dict(), '/home/ubuntu/pan25/task1/ALBERT_HardMoE_Task1.pth')
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch+1} (No improvement in mean metric)")
            break

print(f"\nBest validation mean metric: {best_val_mean:.4f}")



Epoch 1/10
Train Loss: 0.0924, Train Acc: 0.9658
Val Loss: 0.0900, Val Acc: 0.9691, Val Macro F1: 0.9656, Val Macro Recall: 0.9574
Time taken: 427.18s
Metrics JSON: {'roc-auc': np.float64(0.999), 'brier': np.float64(0.9759), 'c@1': np.float64(0.9691), 'f1': 0.9656, 'f05u': 0.9722, 'mean': np.float64(0.9764), 'confusion': [[1171, 106], [5, 2307]]}

Epoch 2/10
Train Loss: 0.0323, Train Acc: 0.9908
Val Loss: 0.0280, Val Acc: 0.9897, Val Macro F1: 0.9888, Val Macro Recall: 0.9915
Time taken: 424.88s
Metrics JSON: {'roc-auc': np.float64(0.9996), 'brier': np.float64(0.9923), 'c@1': np.float64(0.9897), 'f1': 0.9888, 'f05u': 0.9874, 'mean': np.float64(0.9915), 'confusion': [[1274, 3], [34, 2278]]}

Epoch 3/10
Train Loss: 0.0222, Train Acc: 0.9932
Val Loss: 0.0179, Val Acc: 0.9947, Val Macro F1: 0.9942, Val Macro Recall: 0.9948
Time taken: 426.91s
Metrics JSON: {'roc-auc': np.float64(0.9997), 'brier': np.float64(0.9957), 'c@1': np.float64(0.9947), 'f1': 0.9942, 'f05u': 0.9939, 'mean': np.float