In [1]:
!pip install -q transformers datasets accelerate peft nlpaug nltk scikit-learn matplotlib
!pip install -q sentencepiece

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/410.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.5/410.5 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import os
import json
import zipfile
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from peft import get_peft_model, LoraConfig, TaskType
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold
import nlpaug.augmenter.word as naw
import nltk

In [3]:
import os
import nltk
import logging

# 1. Download NLTK data ONCE explicitly
print("Downloading NLTK data...")
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)
nltk.download('stopwords', quiet=True)
print("✓ NLTK Data Ready.")

# 2. THE NUCLEAR FIX: Disable nltk.download completely
# This prevents nlpaug from checking or printing anything inside the loop
def dummy_download(*args, **kwargs):
    return True

nltk.download = dummy_download
print("✓ NLTK Downloader disabled to prevent training logs.")

# Disable library logging (Extra silence)
logging.getLogger("transformers").setLevel(logging.ERROR)
logging.getLogger("nlpaug").setLevel(logging.ERROR)

Downloading NLTK data...
✓ NLTK Data Ready.
✓ NLTK Downloader disabled to prevent training logs.


In [4]:
import sys
import os
from contextlib import contextmanager

def set_seed(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

# Augmenters (Synonym and Random Insertion)
aug_syn = naw.SynonymAug(aug_p=0.1)
aug_insert = naw.RandomWordAug(action="insert", aug_p=0.1)

def augment_text(text):
    if np.random.rand() < 0.5:
        return aug_syn.augment(text)[0]
    else:
        return aug_insert.augment(text)[0]

# Context manager to silence stderr
@contextmanager
def silence_stderr():
    old_stderr = sys.stderr
    sys.stderr = open(os.devnull, 'w')
    try:
        yield
    finally:
        sys.stderr.close()
        sys.stderr = old_stderr

# ------------------------------------------------------------------
# DATASET CLASS
# ------------------------------------------------------------------

class PolarizationDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128, is_multilabel=False, augment=False, is_test=False):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.is_multilabel = is_multilabel
        self.augment = augment
        self.is_test = is_test  # <--- NEW FLAG

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])

        # Augmentation
        if self.augment:
            try:
                text = augment_text(text)
            except:
                pass

        encoding = self.tokenizer(
            text,
            truncation=True,
            padding=False,
            max_length=self.max_length,
            return_tensors='pt'
        )

        item = {key: encoding[key].squeeze() for key in encoding.keys()}

        # ONLY return labels if we are NOT in test mode
        if not self.is_test:
            if self.is_multilabel:
                item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
            else:
                item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)

        return item

# ------------------------------------------------------------------
# METRICS
# ------------------------------------------------------------------

def compute_metrics_binary(p):
    preds = np.argmax(p.predictions, axis=1)
    return {'f1_macro': f1_score(p.label_ids, preds, average='macro')}

def compute_metrics_multilabel(p):
    predictions = p.predictions
    # Fix for tuple output if necessary
    if isinstance(predictions, tuple):
        predictions = predictions[0]

    # Sigmoid activation for multi-label
    preds = (1 / (1 + np.exp(-predictions))) > 0.35
    return {'f1_macro': f1_score(p.label_ids, preds, average='macro')}

# 2. UPDATED TRAINING CONFIG
def run_training(train_df, test_df, model_name, task_type, label_cols, output_dir):
    print(f"\n{'='*40}")
    print(f"TRAINING: {task_type} | Model: {model_name}")
    print(f"{'='*40}")

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Config based on Task
    if task_type == 'task1':
        num_labels = 2
        problem_type = "single_label_classification"
        train_labels = train_df['polarization'].values
        stratify_labels = train_df['polarization'].values
        is_multilabel = False
    else:
        num_labels = len(label_cols)
        problem_type = "multi_label_classification"
        train_labels = train_df[label_cols].values
        stratify_labels = np.zeros(len(train_df))
        is_multilabel = True

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    test_preds_folds = []

    test_dataset = PolarizationDataset(
        test_df['text'].values, None, tokenizer,
        is_multilabel=is_multilabel, is_test=True
    )

    for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, stratify_labels)):
        print(f"\n--- FOLD {fold+1}/5 ---")

        train_sub = train_df.iloc[train_idx]
        val_sub = train_df.iloc[val_idx]

        train_ds = PolarizationDataset(
            train_sub['text'].values, train_labels[train_idx], tokenizer,
            is_multilabel=is_multilabel, augment=True
        )
        val_ds = PolarizationDataset(
            val_sub['text'].values, train_labels[val_idx], tokenizer,
            is_multilabel=is_multilabel, augment=False
        )

        model = AutoModelForSequenceClassification.from_pretrained(
            model_name, num_labels=num_labels, problem_type=problem_type
        )

        target_modules = ["query_proj", "value_proj"] if "deberta" in model_name.lower() else ["query", "value"]

        peft_config = LoraConfig(
            task_type=TaskType.SEQ_CLS,
            inference_mode=False,
            r=16,             # INCREASED RANK from 8 to 16 for more capacity
            lora_alpha=32,    # INCREASED ALPHA
            lora_dropout=0.2,
            target_modules=target_modules
        )
        model = get_peft_model(model, peft_config)

        training_args = TrainingArguments(
            output_dir=f"{output_dir}/fold{fold}",
            learning_rate=2e-5,              # <--- CRITICAL CHANGE: Higher LR for LoRA
            per_device_train_batch_size=16,
            # per_device_train_batch_size=8,
            # gradient_accumulation_steps=2,
            # per_device_eval_batch_size=32,
            per_device_eval_batch_size=16,
            num_train_epochs=5,
            weight_decay=0.01,
            eval_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            metric_for_best_model="f1_macro",
            save_total_limit=1,
            report_to="none",
            remove_unused_columns=False
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_ds,
            eval_dataset=val_ds,
            tokenizer=tokenizer,
            data_collator=DataCollatorWithPadding(tokenizer),
            compute_metrics=compute_metrics_binary if not is_multilabel else compute_metrics_multilabel
        )

        trainer.train()

        preds = trainer.predict(test_dataset).predictions
        test_preds_folds.append(preds)

        del model, trainer, train_ds, val_ds
        torch.cuda.empty_cache()

    avg_preds = np.mean(test_preds_folds, axis=0)

    if not is_multilabel:
        return np.argmax(avg_preds, axis=1)
    else:
        return ((1 / (1 + np.exp(-avg_preds))) > 0.5).astype(int)


In [5]:
!unzip dev_phase.zip

Archive:  dev_phase.zip
   creating: subtask1/
   creating: subtask1/dev/
  inflating: subtask1/dev/nep.csv    
  inflating: subtask1/dev/ita.csv    
  inflating: subtask1/dev/pol.csv    
  inflating: subtask1/dev/rus.csv    
  inflating: subtask1/dev/tel.csv    
  inflating: subtask1/dev/hin.csv    
  inflating: subtask1/dev/hau.csv    
  inflating: subtask1/dev/pan.csv    
  inflating: subtask1/dev/ori.csv    
  inflating: subtask1/dev/spa.csv    
  inflating: subtask1/dev/deu.csv    
  inflating: subtask1/dev/fas.csv    
  inflating: subtask1/dev/arb.csv    
  inflating: subtask1/dev/ben.csv    
  inflating: subtask1/dev/amh.csv    
  inflating: subtask1/dev/khm.csv    
  inflating: subtask1/dev/tur.csv    
  inflating: subtask1/dev/zho.csv    
  inflating: subtask1/dev/eng.csv    
  inflating: subtask1/dev/swa.csv    
  inflating: subtask1/dev/urd.csv    
  inflating: subtask1/dev/mya.csv    
   creating: subtask1/train/
  inflating: subtask1/train/nep.csv  
  inflating: subtask1/t

In [6]:
import wandb

# Disable wandb logging for this script
wandb.init(mode="disabled")

  | |_| | '_ \/ _` / _` |  _/ -_)


In [7]:
# ------------------------------------------------------------------
# EXECUTION BLOCKS
# ------------------------------------------------------------------

# !!! LOAD YOUR DATAFRAMES HERE !!!
df_en_train = pd.read_csv('subtask1/train/eng.csv')
df_en_test = pd.read_csv('subtask1/dev/eng.csv')
# For this script to run, define: df_en_train, df_en_test, df_ha_train, df_ha_test

# 1. ENGLISH TASK 1 (Binary) -> DeBERTa
with silence_stderr():
    pred_en_t1 = run_training(
        df_en_train, df_en_test,
        "microsoft/deberta-v3-large",
        "task1",
        ["label"],
        "output_en_t1"
    )


TRAINING: task1 | Model: microsoft/deberta-v3-large


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/580 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]


--- FOLD 1/5 ---


pytorch_model.bin:   0%|          | 0.00/874M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/874M [00:00<?, ?B/s]

{'eval_loss': 0.6478551626205444, 'eval_f1_macro': 0.3886255924170616, 'eval_runtime': 7.7504, 'eval_samples_per_second': 83.222, 'eval_steps_per_second': 5.29, 'epoch': 1.0}
{'eval_loss': 0.5019956231117249, 'eval_f1_macro': 0.7176656502481693, 'eval_runtime': 6.978, 'eval_samples_per_second': 92.433, 'eval_steps_per_second': 5.876, 'epoch': 2.0}
{'eval_loss': 0.44519275426864624, 'eval_f1_macro': 0.7871560603656094, 'eval_runtime': 7.1628, 'eval_samples_per_second': 90.049, 'eval_steps_per_second': 5.724, 'epoch': 3.0}
{'loss': 0.555, 'grad_norm': 3.144296884536743, 'learning_rate': 1.9197530864197534e-05, 'epoch': 3.0864197530864197}
{'eval_loss': 0.43047159910202026, 'eval_f1_macro': 0.8025621760281718, 'eval_runtime': 7.0993, 'eval_samples_per_second': 90.854, 'eval_steps_per_second': 5.775, 'epoch': 4.0}
{'eval_loss': 0.4242706894874573, 'eval_f1_macro': 0.8031279561077127, 'eval_runtime': 7.1792, 'eval_samples_per_second': 89.843, 'eval_steps_per_second': 5.711, 'epoch': 5.0}
{'

In [15]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import os
import zipfile
from transformers import AutoModelForSequenceClassification, Trainer, AutoTokenizer, AutoConfig
from peft import PeftModel # Import PeftModel
from google.colab import files as colab_files

# --- 1. SETTINGS ---
OUTPUT_DIR = "output_en_t1"         # <--- Your local folder
TEST_FILE_PATH = "subtask1/dev/eng.csv"
NUM_FOLDS = 5
SUBMISSION_DIR = "subtask_1"
CSV_NAME = "pred_eng.csv"

# Determine the correct base model name and num_labels based on OUTPUT_DIR for English Task 1
# This cell is specifically for 'output_en_t1', which used 'microsoft/deberta-v3-large'
# and is a binary classification task (2 labels).
BASE_MODEL_NAME_FOR_PREDICTION = "microsoft/deberta-v3-large"
NUM_LABELS_FOR_TASK = 2

print(f">>> Using base model for prediction: {BASE_MODEL_NAME_FOR_PREDICTION}")

# --- 2. PREPARE DATA ---
print(f">>> Loading Test Data from {TEST_FILE_PATH}...")
test_df = pd.read_csv(TEST_FILE_PATH)

# Auto-detect text column
if "tweet" in test_df.columns:
    text_col = "tweet"
elif "text" in test_df.columns:
    text_col = "text"
else:
    text_col = test_df.columns[1]

print(f">>> Loading Tokenizer from {BASE_MODEL_NAME_FOR_PREDICTION}...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME_FOR_PREDICTION)

test_encodings = tokenizer(
    test_df[text_col].tolist(),
    truncation=True,
    padding=True,
    max_length=128
)

class TestDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings
    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
    def __len__(self):
        return len(self.encodings["input_ids"])

test_dataset = TestDataset(test_encodings)

# --- 3. ENSEMBLE PREDICTION LOOP ---
sum_probs = np.zeros((len(test_df), NUM_LABELS_FOR_TASK))
successful_folds = 0

print(">>> Starting 5-Fold Ensemble Prediction...")

# Loop 0 to 4
for fold in range(0, NUM_FOLDS):
    base_fold_path = f"{OUTPUT_DIR}/fold{fold}"
    print(f"   -> Processing Fold {fold} from: {base_fold_path}")

    if not os.path.exists(base_fold_path):
        print(f"      !! CRITICAL: Base folder not found: {base_fold_path}")
        continue

    # Find the actual checkpoint directory
    checkpoint_dir = None
    for entry in os.listdir(base_fold_path):
        if entry.startswith("checkpoint-") and os.path.isdir(os.path.join(base_fold_path, entry)):
            checkpoint_dir = os.path.join(base_fold_path, entry)
            break

    if checkpoint_dir is None:
        print(f"      !! CRITICAL: No checkpoint found in {base_fold_path}")
        continue

    print(f"      -> Loading model from: {checkpoint_dir}")

    try:
        # Load the base model first
        base_model = AutoModelForSequenceClassification.from_pretrained(
            BASE_MODEL_NAME_FOR_PREDICTION, num_labels=NUM_LABELS_FOR_TASK
        )

        # Load the PEFT adapter weights and attach to the base model
        model = PeftModel.from_pretrained(base_model, checkpoint_dir)

        trainer = Trainer(model=model)
        preds_output = trainer.predict(test_dataset)
        logits = torch.tensor(preds_output.predictions)
        probs = F.softmax(logits, dim=1).numpy()
        sum_probs += probs
        successful_folds += 1

    except Exception as e:
        print(f"      !! Error loading model: {e}")

if successful_folds == 0:
    raise ValueError("STOP: No models were loaded! Check your OUTPUT_DIR path.")

# --- 4. AVERAGE & SAVE ---
print(f">>> Averaging predictions from {successful_folds} folds...")
avg_probs = sum_probs / successful_folds
final_predictions = np.argmax(avg_probs, axis=1)

os.makedirs(SUBMISSION_DIR, exist_ok=True)
csv_path = os.path.join(SUBMISSION_DIR, CSV_NAME)

# Handle ID column
id_col = "ID" if "ID" in test_df.columns else "id"

submission_df = pd.DataFrame({
    'id': test_df[id_col],
    'polarization': final_predictions
})

submission_df.to_csv(csv_path, index=False)
print(f"\n\u2713 Saved CSV to: {csv_path}")

# --- 5. ZIP & DOWNLOAD ---
zip_filename = f'{SUBMISSION_DIR}.zip'
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files_list in os.walk(SUBMISSION_DIR):
        for file in files_list:
            file_path = os.path.join(root, file)
            arcname = os.path.join(os.path.basename(root), file)
            zipf.write(file_path, arcname)

print(f"\u2713 Created Archive: {zip_filename}")
colab_files.download(zip_filename)


>>> Using base model for prediction: microsoft/deberta-v3-large
>>> Loading Test Data from subtask1/dev/eng.csv...
>>> Loading Tokenizer from microsoft/deberta-v3-large...




>>> Starting 5-Fold Ensemble Prediction...
   -> Processing Fold 0 from: output_en_t1/fold0
      -> Loading model from: output_en_t1/fold0/checkpoint-810
   -> Processing Fold 1 from: output_en_t1/fold1
      -> Loading model from: output_en_t1/fold1/checkpoint-810
   -> Processing Fold 2 from: output_en_t1/fold2
      -> Loading model from: output_en_t1/fold2/checkpoint-810
   -> Processing Fold 3 from: output_en_t1/fold3
      -> Loading model from: output_en_t1/fold3/checkpoint-810
   -> Processing Fold 4 from: output_en_t1/fold4
      -> Loading model from: output_en_t1/fold4/checkpoint-648
>>> Averaging predictions from 5 folds...

✓ Saved CSV to: subtask_1/pred_eng.csv
✓ Created Archive: subtask_1.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [16]:
df_ha_train = pd.read_csv('subtask1/train/hau.csv')
df_ha_test = pd.read_csv('subtask1/dev/hau.csv')
pred_ha_t1 = run_training(
    df_ha_train, df_ha_test,
    "Tadesse/AfroXLMR-Social",
    "task1",
    ["label"],
    "output_ha_t1"
)


TRAINING: task1 | Model: Tadesse/AfroXLMR-Social

--- FOLD 1/5 ---


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

  trainer = Trainer(


{'eval_loss': 0.28749868273735046, 'eval_f1_macro': 0.6396622302235367, 'eval_runtime': 12.0369, 'eval_samples_per_second': 60.73, 'eval_steps_per_second': 3.822, 'epoch': 1.0}
{'eval_loss': 0.29347193241119385, 'eval_f1_macro': 0.7085326953748006, 'eval_runtime': 11.8313, 'eval_samples_per_second': 61.785, 'eval_steps_per_second': 3.888, 'epoch': 2.0}
{'loss': 0.2202, 'grad_norm': 5.65501070022583, 'learning_rate': 2.2732240437158473e-05, 'epoch': 2.73224043715847}
{'eval_loss': 0.27999675273895264, 'eval_f1_macro': 0.7650462032944958, 'eval_runtime': 11.808, 'eval_samples_per_second': 61.907, 'eval_steps_per_second': 3.896, 'epoch': 3.0}
{'eval_loss': 0.27647510170936584, 'eval_f1_macro': 0.7466335583663124, 'eval_runtime': 11.8264, 'eval_samples_per_second': 61.811, 'eval_steps_per_second': 3.89, 'epoch': 4.0}
{'eval_loss': 0.28420957922935486, 'eval_f1_macro': 0.7480873940312909, 'eval_runtime': 11.8147, 'eval_samples_per_second': 61.872, 'eval_steps_per_second': 3.893, 'epoch': 5.

  trainer = Trainer(


{'eval_loss': 0.20924672484397888, 'eval_f1_macro': 0.6823258801831859, 'eval_runtime': 12.4116, 'eval_samples_per_second': 58.816, 'eval_steps_per_second': 3.706, 'epoch': 1.0}
{'eval_loss': 0.16360174119472504, 'eval_f1_macro': 0.825573060197206, 'eval_runtime': 12.3445, 'eval_samples_per_second': 59.136, 'eval_steps_per_second': 3.726, 'epoch': 2.0}
{'loss': 0.2389, 'grad_norm': 1.2227486371994019, 'learning_rate': 2.2732240437158473e-05, 'epoch': 2.73224043715847}
{'eval_loss': 0.15539908409118652, 'eval_f1_macro': 0.8373667822041231, 'eval_runtime': 12.3638, 'eval_samples_per_second': 59.043, 'eval_steps_per_second': 3.721, 'epoch': 3.0}
{'eval_loss': 0.15089130401611328, 'eval_f1_macro': 0.8283784650238304, 'eval_runtime': 12.3166, 'eval_samples_per_second': 59.27, 'eval_steps_per_second': 3.735, 'epoch': 4.0}
{'eval_loss': 0.1503094881772995, 'eval_f1_macro': 0.8513994910941476, 'eval_runtime': 12.3501, 'eval_samples_per_second': 59.109, 'eval_steps_per_second': 3.725, 'epoch': 

  trainer = Trainer(


{'eval_loss': 0.20633770525455475, 'eval_f1_macro': 0.7918658280922433, 'eval_runtime': 12.091, 'eval_samples_per_second': 60.375, 'eval_steps_per_second': 3.804, 'epoch': 1.0}
{'eval_loss': 0.19807082414627075, 'eval_f1_macro': 0.832572298325723, 'eval_runtime': 12.0207, 'eval_samples_per_second': 60.729, 'eval_steps_per_second': 3.827, 'epoch': 2.0}
{'loss': 0.2356, 'grad_norm': 1.9658671617507935, 'learning_rate': 2.2732240437158473e-05, 'epoch': 2.73224043715847}
{'eval_loss': 0.20582222938537598, 'eval_f1_macro': 0.8311001126669886, 'eval_runtime': 12.0729, 'eval_samples_per_second': 60.466, 'eval_steps_per_second': 3.81, 'epoch': 3.0}
{'eval_loss': 0.20383873581886292, 'eval_f1_macro': 0.8277173200472008, 'eval_runtime': 12.0829, 'eval_samples_per_second': 60.416, 'eval_steps_per_second': 3.807, 'epoch': 4.0}
{'eval_loss': 0.1976899802684784, 'eval_f1_macro': 0.8393050030971798, 'eval_runtime': 12.0748, 'eval_samples_per_second': 60.457, 'eval_steps_per_second': 3.81, 'epoch': 5.

  trainer = Trainer(


{'eval_loss': 0.21068896353244781, 'eval_f1_macro': 0.797089288055592, 'eval_runtime': 11.6976, 'eval_samples_per_second': 62.406, 'eval_steps_per_second': 3.932, 'epoch': 1.0}
{'eval_loss': 0.21854862570762634, 'eval_f1_macro': 0.8159436410826845, 'eval_runtime': 11.7041, 'eval_samples_per_second': 62.371, 'eval_steps_per_second': 3.93, 'epoch': 2.0}
{'loss': 0.2256, 'grad_norm': 0.7787156701087952, 'learning_rate': 2.2732240437158473e-05, 'epoch': 2.73224043715847}
{'eval_loss': 0.20708543062210083, 'eval_f1_macro': 0.7943827458554643, 'eval_runtime': 11.744, 'eval_samples_per_second': 62.159, 'eval_steps_per_second': 3.917, 'epoch': 3.0}
{'eval_loss': 0.2161797136068344, 'eval_f1_macro': 0.79674230822776, 'eval_runtime': 11.7182, 'eval_samples_per_second': 62.296, 'eval_steps_per_second': 3.926, 'epoch': 4.0}
{'eval_loss': 0.21579548716545105, 'eval_f1_macro': 0.7943827458554643, 'eval_runtime': 11.7606, 'eval_samples_per_second': 62.071, 'eval_steps_per_second': 3.911, 'epoch': 5.0

  trainer = Trainer(


{'eval_loss': 0.2390730381011963, 'eval_f1_macro': 0.6860092281223117, 'eval_runtime': 11.3608, 'eval_samples_per_second': 64.256, 'eval_steps_per_second': 4.049, 'epoch': 1.0}
{'eval_loss': 0.231185182929039, 'eval_f1_macro': 0.7654382171779609, 'eval_runtime': 11.417, 'eval_samples_per_second': 63.94, 'eval_steps_per_second': 4.029, 'epoch': 2.0}
{'loss': 0.2359, 'grad_norm': 0.8986953496932983, 'learning_rate': 2.2732240437158473e-05, 'epoch': 2.73224043715847}
{'eval_loss': 0.2332063466310501, 'eval_f1_macro': 0.7969882992748847, 'eval_runtime': 11.3897, 'eval_samples_per_second': 64.093, 'eval_steps_per_second': 4.039, 'epoch': 3.0}
{'eval_loss': 0.24807001650333405, 'eval_f1_macro': 0.7369617995049944, 'eval_runtime': 11.3403, 'eval_samples_per_second': 64.372, 'eval_steps_per_second': 4.056, 'epoch': 4.0}
{'eval_loss': 0.2167191207408905, 'eval_f1_macro': 0.794563826454487, 'eval_runtime': 11.3583, 'eval_samples_per_second': 64.27, 'eval_steps_per_second': 4.05, 'epoch': 5.0}
{'

In [17]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import os
import zipfile
from transformers import AutoModelForSequenceClassification, Trainer, AutoTokenizer, AutoConfig
from google.colab import files as colab_files

# --- 1. SETTINGS ---
OUTPUT_DIR = "output_ha_t1"         # <--- Your Hausa output folder
TEST_FILE_PATH = "subtask1/dev/hau.csv"
NUM_FOLDS = 5
SUBMISSION_DIR = "subtask_1"
CSV_NAME = "pred_hau.csv"

# Hausa likely used AfroXLMR (Check your training logs if unsure!)
MODEL_NAME = "Tadesse/AfroXLMR-Social"

# --- 2. PREPARE DATA ---
print(f">>> Loading Test Data from {TEST_FILE_PATH}...")
test_df = pd.read_csv(TEST_FILE_PATH)

# Auto-detect text column
if "tweet" in test_df.columns:
    text_col = "tweet"
elif "text" in test_df.columns:
    text_col = "text"
else:
    text_col = test_df.columns[1]

print(f">>> Loading Tokenizer from {MODEL_NAME}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

test_encodings = tokenizer(
    test_df[text_col].tolist(),
    truncation=True,
    padding=True,
    max_length=128
)

class TestDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings
    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
    def __len__(self):
        return len(self.encodings["input_ids"])

test_dataset = TestDataset(test_encodings)

# --- 3. ENSEMBLE PREDICTION LOOP ---
sum_probs = np.zeros((len(test_df), 2))
successful_folds = 0

print(">>> Starting 5-Fold Ensemble Prediction...")

for fold in range(0, NUM_FOLDS):
    fold_path = f"{OUTPUT_DIR}/fold{fold}"
    print(f"   -> Processing Fold {fold} from: {fold_path}")

    if not os.path.exists(fold_path):
        print(f"      !! CRITICAL: Folder not found: {fold_path}")
        continue

    try:
        # 1. Load Config (Forces correct architecture)
        config = AutoConfig.from_pretrained(MODEL_NAME, num_labels=2)

        # 2. Load Model (Standard loading, NO PeftModel)
        model = AutoModelForSequenceClassification.from_pretrained(
            fold_path,
            config=config,
            local_files_only=True
        )

        # 3. Predict
        trainer = Trainer(model=model)
        preds_output = trainer.predict(test_dataset)
        logits = torch.tensor(preds_output.predictions)
        probs = F.softmax(logits, dim=1).numpy()
        sum_probs += probs
        successful_folds += 1

    except Exception as e:
        print(f"      !! Error loading model: {e}")

if successful_folds == 0:
    raise ValueError("STOP: No models were loaded! Check your OUTPUT_DIR path.")

# --- 4. AVERAGE & SAVE ---
print(f">>> Averaging predictions from {successful_folds} folds...")
avg_probs = sum_probs / successful_folds
final_predictions = np.argmax(avg_probs, axis=1)

os.makedirs(SUBMISSION_DIR, exist_ok=True)
csv_path = os.path.join(SUBMISSION_DIR, CSV_NAME)

# Handle ID column
id_col = "ID" if "ID" in test_df.columns else "id"

submission_df = pd.DataFrame({
    'id': test_df[id_col],
    'polarization': final_predictions
})

submission_df.to_csv(csv_path, index=False)
print(f"\n✓ Saved CSV to: {csv_path}")

# --- 5. ZIP & DOWNLOAD ---
zip_filename = f'{SUBMISSION_DIR}.zip'
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files_list in os.walk(SUBMISSION_DIR):
        for file in files_list:
            file_path = os.path.join(root, file)
            arcname = os.path.join(os.path.basename(root), file)
            zipf.write(file_path, arcname)

print(f"✓ Created Archive: {zip_filename}")
colab_files.download(zip_filename)

>>> Using base model for prediction: Tadesse/AfroXLMR-Social
>>> Loading Test Data from subtask1/dev/hau.csv...
>>> Loading Tokenizer from Tadesse/AfroXLMR-Social...
>>> Starting 5-Fold Ensemble Prediction...
   -> Processing Fold 0 from: output_ha_t1/fold0
      -> Loading model from: output_ha_t1/fold0/checkpoint-549
   -> Processing Fold 1 from: output_ha_t1/fold1
      -> Loading model from: output_ha_t1/fold1/checkpoint-915
   -> Processing Fold 2 from: output_ha_t1/fold2
      -> Loading model from: output_ha_t1/fold2/checkpoint-915
   -> Processing Fold 3 from: output_ha_t1/fold3
      -> Loading model from: output_ha_t1/fold3/checkpoint-366
   -> Processing Fold 4 from: output_ha_t1/fold4
      -> Loading model from: output_ha_t1/fold4/checkpoint-549
>>> Averaging predictions from 5 folds...

✓ Saved CSV to: subtask_1/pred_hau.csv
✓ Created Archive: subtask_1.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# 2. ENGLISH TASK 2 (Types) -> AfroXLMR-Social
df_en_train_t2 = pd.read_csv('subtask2/train/eng.csv')
df_en_test_t2 = pd.read_csv('subtask2/dev/eng.csv')
cols_t2 = ['gender/sexual','political','religious','racial/ethnic','other']
with silence_stderr():
    pred_en_t2 = run_training(
        df_en_train_t2, df_en_test_t2,
        "Tadesse/AfroXLMR-Social",
        "task2",
        cols_t2,
        "output_en_t2"
    )


TRAINING: task2 | Model: Tadesse/AfroXLMR-Social

--- FOLD 1/5 ---
{'eval_loss': 0.19475294649600983, 'eval_f1_macro': 0.27619012903696144, 'eval_runtime': 5.4253, 'eval_samples_per_second': 118.888, 'eval_steps_per_second': 14.93, 'epoch': 1.0}
{'loss': 0.2165, 'grad_norm': 1.5700268745422363, 'learning_rate': 0.00025365325077399375, 'epoch': 1.5479876160990713}
{'eval_loss': 0.1976374238729477, 'eval_f1_macro': 0.413612125047929, 'eval_runtime': 5.3488, 'eval_samples_per_second': 120.589, 'eval_steps_per_second': 15.144, 'epoch': 2.0}
{'eval_loss': 0.19770392775535583, 'eval_f1_macro': 0.36722240416509216, 'eval_runtime': 5.3456, 'eval_samples_per_second': 120.66, 'eval_steps_per_second': 15.153, 'epoch': 3.0}
{'loss': 0.167, 'grad_norm': 2.214354991912842, 'learning_rate': 0.00020721362229102165, 'epoch': 3.0959752321981426}
{'eval_loss': 0.22061091661453247, 'eval_f1_macro': 0.3737643188116368, 'eval_runtime': 5.3855, 'eval_samples_per_second': 119.765, 'eval_steps_per_second': 15

In [None]:
# 2. ENGLISH TASK 2 (Types) -> AfroXLMR-Social
df_ha_train_t2 = pd.read_csv('subtask2/train/hau.csv')
df_ha_test_t2 = pd.read_csv('subtask2/dev/hau.csv')
cols_t2 = ['gender/sexual','political','religious','racial/ethnic','other']
with silence_stderr():
    pred_ha_t2 = run_training(
        df_ha_train_t2, df_ha_test_t2,
        "Tadesse/AfroXLMR-Social",
        "task2",
        cols_t2,
        "output_ha_t2"
    )



TRAINING: task2 | Model: Tadesse/AfroXLMR-Social

--- FOLD 1/5 ---
{'eval_loss': 0.08129671961069107, 'eval_f1_macro': 0.21895970695970696, 'eval_runtime': 10.3901, 'eval_samples_per_second': 70.355, 'eval_steps_per_second': 8.855, 'epoch': 1.0}
{'loss': 0.0771, 'grad_norm': 0.09743395447731018, 'learning_rate': 0.000258986301369863, 'epoch': 1.36986301369863}
{'eval_loss': 0.06980200111865997, 'eval_f1_macro': 0.25456429147001264, 'eval_runtime': 10.4334, 'eval_samples_per_second': 70.064, 'eval_steps_per_second': 8.818, 'epoch': 2.0}


In [None]:
# 3. ENGLISH TASK 3 (Manifestations) -> AfroXLMR-Social
df_en_train_t3 = pd.read_csv('subtask3/train/eng.csv')
df_en_test_t3 = pd.read_csv('subtask3/dev/eng.csv')
cols_t3 = ['stereotype', 'vilification','dehumanization','extreme_language','lack_of_empathy','invalidation']
with silence_stderr():
    pred_en_t3 = run_training(
        df_en_train_t3, df_en_test_t3,
        "Tadesse/AfroXLMR-Social",
        "task3",
        cols_t3,
        "output_en_t3"
    )

In [None]:
# 3. ENGLISH TASK 3 (Manifestations) -> AfroXLMR-Social
df_ha_train_t3 = pd.read_csv('subtask3/train/hau.csv')
df_ha_test_t3 = pd.read_csv('subtask3/dev/hau.csv')
cols_t3 = ['stereotype', 'vilification','dehumanization','extreme_language','lack_of_empathy','invalidation']
with silence_stderr():
    pred_ha_t3 = run_training(
        df_ha_train_t3, df_ha_test_t3,
        "Tadesse/AfroXLMR-Social",
        "task3",
        cols_t3,
        "output_ha_t3"
    )
