# mberta for polarozation classification

---



## Introduction

In this part of the starter notebook, we will take you through the process of all three Subtasks.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Subtask 1 - Polarization detection

This is a binary classification to determine whether a post contains polarized content (Polarized or Not Polarized).

In [2]:
!unzip /content/drive/MyDrive/dev_phase.zip

Archive:  /content/drive/MyDrive/dev_phase.zip
   creating: subtask1/
   creating: subtask1/dev/
  inflating: subtask1/dev/nep.csv    
  inflating: subtask1/dev/ita.csv    
  inflating: subtask1/dev/hin.csv    
  inflating: subtask1/dev/hau.csv    
  inflating: subtask1/dev/spa.csv    
  inflating: subtask1/dev/deu.csv    
  inflating: subtask1/dev/fas.csv    
  inflating: subtask1/dev/arb.csv    
  inflating: subtask1/dev/amh.csv    
  inflating: subtask1/dev/tur.csv    
  inflating: subtask1/dev/zho.csv    
  inflating: subtask1/dev/eng.csv    
  inflating: subtask1/dev/urd.csv    
   creating: subtask1/train/
  inflating: subtask1/train/nep.csv  
  inflating: subtask1/train/ita.csv  
  inflating: subtask1/train/hin.csv  
  inflating: subtask1/train/fas.csv  
  inflating: subtask1/train/deu.csv  
  inflating: subtask1/train/hau.csv  
  inflating: subtask1/train/spa.csv  
  inflating: subtask1/train/arb.csv  
  inflating: subtask1/train/tur.csv  
  inflating: subtask1/train/zho.csv  


## Imports

In [3]:
import pandas as pd

from sklearn.metrics import recall_score, precision_score, f1_score
import numpy as np

import torch

from sklearn.metrics import f1_score

from transformers import (
    AutoTokenizer,
    AutoConfig,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from torch.utils.data import Dataset

In [4]:
!pip install wandb



In [5]:
import wandb

# Disable wandb logging for this script
wandb.init(mode="disabled")

  | |_| | '_ \/ _` / _` |  _/ -_)


Using mberta model for classification


In [7]:

drive.mount('/content/drive')
import torch
torch.cuda.empty_cache()
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from torch.utils.data import Dataset


# Dataset class

class PolarizationDataset(Dataset):
    def __init__(self, df, tokenizer, require_labels=True):
        self.texts = df["text"].fillna("").tolist()
        if require_labels:
            self.labels = df["polarization"].astype(int).tolist()
        else:
            self.labels = [0] * len(self.texts)  # dummy labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=128,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k,v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item


# Load data

languages = ["eng","hin","spa","urd","zho","arb"]
data = {}

for lang in languages:
    train_df = pd.read_csv(f"subtask1/train/{lang}.csv")   # labeled
    dev_df   = pd.read_csv(f"subtask1/dev/{lang}.csv")     # unlabeled
    data[lang] = {"train": train_df, "dev": dev_df}

tokenizer = AutoTokenizer.from_pretrained("microsoft/mdeberta-v3-base")


# Metric

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {"f1_macro": f1_score(p.label_ids, preds, average="macro")}


# MAIN LOOP: TRAIN/VAL SPLIT + DEV PREDICTION

f1_results = []
predicted_outputs = {}

for lang, dfs in data.items():
    print("\n====================================")
    print(f"LANGUAGE: {lang}")

    train_df = dfs["train"]
    dev_df   = dfs["dev"]

    # Filter ONLY labeled training rows
    train_labeled = train_df.dropna(subset=["polarization"]).reset_index(drop=True)

    # Split train into train/validation
    train_split, val_split = train_test_split(
        train_labeled,
        test_size=0.20,
        stratify=train_labeled["polarization"],
        random_state=42,
        shuffle=True,
    )

    print(f"Train size: {len(train_split)},  Validation size: {len(val_split)}")

    train_dataset = PolarizationDataset(train_split, tokenizer, require_labels=True)
    val_dataset   = PolarizationDataset(val_split,   tokenizer, require_labels=True)

    # Train model
    model = AutoModelForSequenceClassification.from_pretrained(
        "microsoft/mdeberta-v3-base", num_labels=2
    )

    training_args = TrainingArguments(
        output_dir=f"./model_{lang}",
        learning_rate=2e-5,
        num_train_epochs=30,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        eval_strategy="epoch",
        save_strategy="no",
        logging_steps=20
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        data_collator=DataCollatorWithPadding(tokenizer)
    )

    trainer.train()

    # Compute F1 on validation
    metrics = trainer.evaluate()
    f1 = metrics["eval_f1_macro"]
    print(f"{lang} Validation F1 = {f1:.4f}")

    f1_results.append({"language": lang, "f1_macro": f1})

    # Predict on dev (UNLABELED)
    print(f"ðŸ”® Predicting for dev set ({len(dev_df)} rows)...")
    dev_dataset = PolarizationDataset(dev_df, tokenizer, require_labels=False)
    preds = trainer.predict(dev_dataset)
    pred_labels = np.argmax(preds.predictions, axis=1)

    dev_df["predicted_polarization"] = pred_labels
    predicted_outputs[lang] = dev_df


# SAVE PREDICTIONS

for lang, df_pred in predicted_outputs.items():
    df_pred.to_csv(f"{lang}_dev_predicted.csv", index=False)
    print(f"Saved: {lang}_dev_predicted.csv")


# FINAL F1 SCORES

f1_df = pd.DataFrame(f1_results)
print("\nFINAL F1 SCORES:")
print(f1_df)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


spm.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]




LANGUAGE: eng
Train size: 2140,  Validation size: 536


pytorch_model.bin:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,F1 Macro
1,0.4757,0.446907,0.779958
2,0.3304,0.507774,0.794502
3,0.3715,0.58741,0.801772
4,0.1537,0.805751,0.778993
5,0.1635,1.089038,0.791086
6,0.0322,1.146043,0.793215
7,0.0525,1.298952,0.78642
8,0.0455,1.439683,0.768838
9,0.03,1.525748,0.786855
10,0.0158,1.504409,0.805677


eng Validation F1 = 0.7877
ðŸ”® Predicting for dev set (133 rows)...

LANGUAGE: hin
Train size: 2195,  Validation size: 549


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.3098,0.308541,0.688905
2,0.2806,0.299368,0.765885
3,0.2609,0.368244,0.752384
4,0.2229,0.411892,0.738239
5,0.1832,0.621836,0.719577
6,0.0966,0.502394,0.754032
7,0.0804,0.53038,0.748227
8,0.0544,0.575794,0.777785
9,0.0038,0.57403,0.784198
10,0.012,0.624306,0.783049


hin Validation F1 = 0.8030
ðŸ”® Predicting for dev set (137 rows)...

LANGUAGE: spa
Train size: 2644,  Validation size: 661


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5885,0.611461,0.688607
2,0.488,0.552798,0.736105
3,0.3606,0.583206,0.752961
4,0.266,0.731136,0.732071
5,0.2001,0.910493,0.752961
6,0.1945,1.167297,0.7281
7,0.1504,1.159004,0.74886
8,0.1552,1.612338,0.754507
9,0.0862,1.6454,0.745583
10,0.0362,1.76659,0.757262


spa Validation F1 = 0.7471
ðŸ”® Predicting for dev set (165 rows)...

LANGUAGE: urd
Train size: 2279,  Validation size: 570


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5351,0.500042,0.719803
2,0.4655,0.509672,0.731624
3,0.3668,0.589544,0.716917
4,0.2104,0.778126,0.712906
5,0.1355,1.099859,0.70395
6,0.1229,1.181069,0.726398
7,0.0989,1.506522,0.72193
8,0.05,1.214639,0.754292
9,0.0943,1.59997,0.734499
10,0.0317,1.647341,0.732218


urd Validation F1 = 0.7338
ðŸ”® Predicting for dev set (142 rows)...

LANGUAGE: zho
Train size: 3424,  Validation size: 856


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.4384,0.380314,0.843833
2,0.3011,0.440496,0.857464
3,0.18,0.582985,0.845348
4,0.2005,0.572474,0.852513
5,0.0539,0.691404,0.859592
6,0.0594,0.900352,0.851976
7,0.0083,0.931483,0.860272
8,0.0454,0.893461,0.857251
9,0.0153,0.951463,0.867794
10,0.0002,0.992589,0.874986


zho Validation F1 = 0.8808
ðŸ”® Predicting for dev set (214 rows)...

LANGUAGE: arb
Train size: 2704,  Validation size: 676


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.4637,0.443797,0.77306
2,0.376,0.475424,0.792899
3,0.2617,0.546623,0.788216
4,0.1708,0.826366,0.788383
5,0.1336,1.120317,0.766447
6,0.0928,1.086063,0.791143
7,0.0695,1.301,0.790573
8,0.0364,1.471808,0.789203
9,0.0598,1.564668,0.781683
10,0.0144,1.709036,0.76038


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.4637,0.443797,0.77306
2,0.376,0.475424,0.792899
3,0.2617,0.546623,0.788216
4,0.1708,0.826366,0.788383
5,0.1336,1.120317,0.766447
6,0.0928,1.086063,0.791143
7,0.0695,1.301,0.790573
8,0.0364,1.471808,0.789203
9,0.0598,1.564668,0.781683
10,0.0144,1.709036,0.76038


arb Validation F1 = 0.7696
ðŸ”® Predicting for dev set (169 rows)...
Saved: eng_dev_predicted.csv
Saved: hin_dev_predicted.csv
Saved: spa_dev_predicted.csv
Saved: urd_dev_predicted.csv
Saved: zho_dev_predicted.csv
Saved: arb_dev_predicted.csv

FINAL F1 SCORES:
  language  f1_macro
0      eng  0.787699
1      hin  0.803049
2      spa  0.747121
3      urd  0.733837
4      zho  0.880776
5      arb  0.769575
