In [1]:
from google.colab import drive
drive.mount('/content/drive')
!unzip /content/drive/MyDrive/dev_phase.zip

Mounted at /content/drive
Archive:  /content/drive/MyDrive/dev_phase.zip
   creating: subtask1/
   creating: subtask1/dev/
  inflating: subtask1/dev/nep.csv    
  inflating: subtask1/dev/ita.csv    
  inflating: subtask1/dev/hin.csv    
  inflating: subtask1/dev/hau.csv    
  inflating: subtask1/dev/spa.csv    
  inflating: subtask1/dev/deu.csv    
  inflating: subtask1/dev/fas.csv    
  inflating: subtask1/dev/arb.csv    
  inflating: subtask1/dev/amh.csv    
  inflating: subtask1/dev/tur.csv    
  inflating: subtask1/dev/zho.csv    
  inflating: subtask1/dev/eng.csv    
  inflating: subtask1/dev/urd.csv    
   creating: subtask1/train/
  inflating: subtask1/train/nep.csv  
  inflating: subtask1/train/ita.csv  
  inflating: subtask1/train/hin.csv  
  inflating: subtask1/train/fas.csv  
  inflating: subtask1/train/deu.csv  
  inflating: subtask1/train/hau.csv  
  inflating: subtask1/train/spa.csv  
  inflating: subtask1/train/arb.csv  
  inflating: subtask1/train/tur.csv  
  inflating:

In [2]:
import wandb

# Disable wandb logging for this script
wandb.init(mode="disabled")

  | |_| | '_ \/ _` / _` |  _/ -_)


LEARNING RATE :2e-5 , EPOCH : 5

In [3]:

drive.mount('/content/drive')
import torch
torch.cuda.empty_cache()
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from torch.utils.data import Dataset

# ---------------------------------------
# Dataset class
# ---------------------------------------
class PolarizationDataset(Dataset):
    def __init__(self, df, tokenizer, require_labels=True):
        self.texts = df["text"].fillna("").tolist()
        if require_labels:
            self.labels = df["polarization"].astype(int).tolist()
        else:
            self.labels = [0] * len(self.texts)  # dummy labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=128,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k,v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

# ---------------------------------------
# Load data
# ---------------------------------------
languages = ["eng","hin","spa","urd","zho","arb"]
data = {}

for lang in languages:
    train_df = pd.read_csv(f"subtask1/train/{lang}.csv")   # labeled
    dev_df   = pd.read_csv(f"subtask1/dev/{lang}.csv")     # unlabeled
    data[lang] = {"train": train_df, "dev": dev_df}

tokenizer = AutoTokenizer.from_pretrained("microsoft/mdeberta-v3-base")

# ---------------------------------------
# Metric
# ---------------------------------------
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {"f1_macro": f1_score(p.label_ids, preds, average="macro")}

# ---------------------------------------
# MAIN LOOP: TRAIN/VAL SPLIT + DEV PREDICTION
# ---------------------------------------
f1_results = []
predicted_outputs = {}

for lang, dfs in data.items():
    print("\n====================================")
    print(f" LANGUAGE: {lang}")

    train_df = dfs["train"]
    dev_df   = dfs["dev"]

    # 1️⃣ Filter ONLY labeled training rows
    train_labeled = train_df.dropna(subset=["polarization"]).reset_index(drop=True)

    # 2️⃣ Split train into train/validation
    train_split, val_split = train_test_split(
        train_labeled,
        test_size=0.20,
        stratify=train_labeled["polarization"],
        random_state=42,
        shuffle=True,
    )

    print(f"Train size: {len(train_split)},  Validation size: {len(val_split)}")

    train_dataset = PolarizationDataset(train_split, tokenizer, require_labels=True)
    val_dataset   = PolarizationDataset(val_split,   tokenizer, require_labels=True)

    # 3️⃣ Train model
    model = AutoModelForSequenceClassification.from_pretrained(
        "microsoft/mdeberta-v3-base", num_labels=2
    )

    training_args = TrainingArguments(
        output_dir=f"./model_{lang}",
        learning_rate=2e-5,
        num_train_epochs=5,
        per_device_train_batch_size=64,
        per_device_eval_batch_size=16,
        eval_strategy="epoch",
        save_strategy="no",
        logging_steps=20
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        data_collator=DataCollatorWithPadding(tokenizer)
    )

    trainer.train()

    # 4️⃣ Compute F1 on validation
    metrics = trainer.evaluate()
    f1 = metrics["eval_f1_macro"]
    print(f" {lang} Validation F1 = {f1:.4f}")

    f1_results.append({"language": lang, "f1_macro": f1})

    # 5️⃣ Predict on dev (UNLABELED)
    print(f" Predicting for dev set ({len(dev_df)} rows)...")
    dev_dataset = PolarizationDataset(dev_df, tokenizer, require_labels=False)
    preds = trainer.predict(dev_dataset)
    pred_labels = np.argmax(preds.predictions, axis=1)

    dev_df["predicted_polarization"] = pred_labels
    predicted_outputs[lang] = dev_df

# ---------------------------------------
# SAVE PREDICTIONS
# ---------------------------------------
for lang, df_pred in predicted_outputs.items():
    df_pred.to_csv(f"{lang}_dev_predicted.csv", index=False)
    print(f" Saved: {lang}_dev_predicted.csv")

# ---------------------------------------
# FINAL F1 SCORES
# ---------------------------------------
f1_df = pd.DataFrame(f1_results)
print("\n FINAL F1 SCORES:")
print(f1_df)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]




 LANGUAGE: eng
Train size: 2140,  Validation size: 536


pytorch_model.bin:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6311,0.525668,0.702075
2,0.4854,0.484988,0.772796
3,0.4104,0.466146,0.787869
4,0.3568,0.465547,0.785753
5,0.3239,0.460577,0.788822


 eng Validation F1 = 0.7888
 Predicting for dev set (133 rows)...

 LANGUAGE: hin
Train size: 2195,  Validation size: 549


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.578,0.373643,0.460707
2,0.342,0.326982,0.70172
3,0.2463,0.316738,0.736722
4,0.2156,0.312341,0.76544
5,0.2027,0.314117,0.756033


 hin Validation F1 = 0.7560
 Predicting for dev set (137 rows)...

 LANGUAGE: spa
Train size: 2644,  Validation size: 661


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6774,0.62418,0.685928
2,0.5684,0.570306,0.716923
3,0.4875,0.557844,0.722878
4,0.4403,0.57175,0.730556
5,0.4128,0.582611,0.72692


 spa Validation F1 = 0.7269
 Predicting for dev set (165 rows)...

 LANGUAGE: urd
Train size: 2279,  Validation size: 570


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6622,0.540939,0.694901
2,0.4887,0.48882,0.730283
3,0.4395,0.510893,0.733649
4,0.3815,0.589515,0.696984
5,0.3598,0.572799,0.704494


 urd Validation F1 = 0.7045
 Predicting for dev set (142 rows)...

 LANGUAGE: zho
Train size: 3424,  Validation size: 856


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5994,0.484117,0.803477
2,0.3745,0.37196,0.848128
3,0.2937,0.368779,0.856238
4,0.2541,0.381828,0.858622
5,0.1794,0.385785,0.858504


 zho Validation F1 = 0.8585
 Predicting for dev set (214 rows)...

 LANGUAGE: arb
Train size: 2704,  Validation size: 676


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5121,0.514282,0.737506
2,0.418,0.47971,0.773128
3,0.3378,0.507187,0.767197
4,0.2864,0.518066,0.768954
5,0.2523,0.542999,0.768262


 arb Validation F1 = 0.7683
 Predicting for dev set (169 rows)...
 Saved: eng_dev_predicted.csv
 Saved: hin_dev_predicted.csv
 Saved: spa_dev_predicted.csv
 Saved: urd_dev_predicted.csv
 Saved: zho_dev_predicted.csv
 Saved: arb_dev_predicted.csv

 FINAL F1 SCORES:
  language  f1_macro
0      eng  0.788822
1      hin  0.756033
2      spa  0.726920
3      urd  0.704494
4      zho  0.858504
5      arb  0.768262


LEARNING RATE: 1e-5 EPOCH:5

In [4]:

drive.mount('/content/drive')
import torch
torch.cuda.empty_cache()
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from torch.utils.data import Dataset

# ---------------------------------------
# Dataset class
# ---------------------------------------
class PolarizationDataset(Dataset):
    def __init__(self, df, tokenizer, require_labels=True):
        self.texts = df["text"].fillna("").tolist()
        if require_labels:
            self.labels = df["polarization"].astype(int).tolist()
        else:
            self.labels = [0] * len(self.texts)  # dummy labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=128,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k,v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

# ---------------------------------------
# Load data
# ---------------------------------------
languages = ["eng","hin","spa","urd","zho","arb"]
data = {}

for lang in languages:
    train_df = pd.read_csv(f"subtask1/train/{lang}.csv")   # labeled
    dev_df   = pd.read_csv(f"subtask1/dev/{lang}.csv")     # unlabeled
    data[lang] = {"train": train_df, "dev": dev_df}

tokenizer = AutoTokenizer.from_pretrained("microsoft/mdeberta-v3-base")

# ---------------------------------------
# Metric
# ---------------------------------------
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {"f1_macro": f1_score(p.label_ids, preds, average="macro")}

# ---------------------------------------
# MAIN LOOP: TRAIN/VAL SPLIT + DEV PREDICTION
# ---------------------------------------
f1_results = []
predicted_outputs = {}

for lang, dfs in data.items():
    print("\n====================================")
    print(f" LANGUAGE: {lang}")

    train_df = dfs["train"]
    dev_df   = dfs["dev"]

    # 1️⃣ Filter ONLY labeled training rows
    train_labeled = train_df.dropna(subset=["polarization"]).reset_index(drop=True)

    # 2️⃣ Split train into train/validation
    train_split, val_split = train_test_split(
        train_labeled,
        test_size=0.20,
        stratify=train_labeled["polarization"],
        random_state=42,
        shuffle=True,
    )

    print(f"Train size: {len(train_split)},  Validation size: {len(val_split)}")

    train_dataset = PolarizationDataset(train_split, tokenizer, require_labels=True)
    val_dataset   = PolarizationDataset(val_split,   tokenizer, require_labels=True)

    # 3️⃣ Train model
    model = AutoModelForSequenceClassification.from_pretrained(
        "microsoft/mdeberta-v3-base", num_labels=2
    )

    training_args = TrainingArguments(
        output_dir=f"./model_{lang}",
        learning_rate=1e-5,
        num_train_epochs=5,
        per_device_train_batch_size=64,
        per_device_eval_batch_size=16,
        eval_strategy="epoch",
        save_strategy="no",
        logging_steps=20
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        data_collator=DataCollatorWithPadding(tokenizer)
    )

    trainer.train()

    # 4️⃣ Compute F1 on validation
    metrics = trainer.evaluate()
    f1 = metrics["eval_f1_macro"]
    print(f" {lang} Validation F1 = {f1:.4f}")

    f1_results.append({"language": lang, "f1_macro": f1})

    # 5️⃣ Predict on dev (UNLABELED)
    print(f" Predicting for dev set ({len(dev_df)} rows)...")
    dev_dataset = PolarizationDataset(dev_df, tokenizer, require_labels=False)
    preds = trainer.predict(dev_dataset)
    pred_labels = np.argmax(preds.predictions, axis=1)

    dev_df["predicted_polarization"] = pred_labels
    predicted_outputs[lang] = dev_df

# ---------------------------------------
# SAVE PREDICTIONS
# ---------------------------------------
for lang, df_pred in predicted_outputs.items():
    df_pred.to_csv(f"{lang}_dev_predicted.csv", index=False)
    print(f" Saved: {lang}_dev_predicted.csv")

# ---------------------------------------
# FINAL F1 SCORES
# ---------------------------------------
f1_df = pd.DataFrame(f1_results)
print("\n FINAL F1 SCORES:")
print(f1_df)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).





 LANGUAGE: eng
Train size: 2140,  Validation size: 536


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6495,0.55094,0.384615
2,0.5207,0.524046,0.495097
3,0.4797,0.510029,0.764222
4,0.4504,0.490972,0.790876
5,0.4314,0.491663,0.79183


 eng Validation F1 = 0.7918
 Predicting for dev set (133 rows)...

 LANGUAGE: hin
Train size: 2195,  Validation size: 549


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.698,0.403355,0.460707
2,0.3898,0.360659,0.460707
3,0.317,0.320147,0.61493
4,0.2892,0.299303,0.720788
5,0.2851,0.302403,0.724999


 hin Validation F1 = 0.7250
 Predicting for dev set (137 rows)...

 LANGUAGE: spa
Train size: 2644,  Validation size: 661


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6908,0.679334,0.519762
2,0.6212,0.615305,0.692662
3,0.5556,0.58492,0.70001
4,0.5395,0.579606,0.713172
5,0.5245,0.576889,0.70623


 spa Validation F1 = 0.7062
 Predicting for dev set (165 rows)...

 LANGUAGE: urd
Train size: 2279,  Validation size: 570


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.7111,0.595619,0.409326
2,0.5631,0.516469,0.723111
3,0.4998,0.492263,0.730867
4,0.4544,0.499344,0.7182
5,0.4349,0.49898,0.721408


 urd Validation F1 = 0.7214
 Predicting for dev set (142 rows)...

 LANGUAGE: zho
Train size: 3424,  Validation size: 856


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6713,0.549092,0.748882
2,0.452,0.438433,0.825881
3,0.3583,0.406347,0.829625
4,0.3228,0.402044,0.832995
5,0.2944,0.401512,0.835746


 zho Validation F1 = 0.8357
 Predicting for dev set (214 rows)...

 LANGUAGE: arb
Train size: 2704,  Validation size: 676


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5829,0.538088,0.72456
2,0.4594,0.474885,0.770669
3,0.4035,0.486779,0.759612
4,0.3671,0.472108,0.779956
5,0.3493,0.483933,0.768296


 arb Validation F1 = 0.7683
 Predicting for dev set (169 rows)...
 Saved: eng_dev_predicted.csv
 Saved: hin_dev_predicted.csv
 Saved: spa_dev_predicted.csv
 Saved: urd_dev_predicted.csv
 Saved: zho_dev_predicted.csv
 Saved: arb_dev_predicted.csv

 FINAL F1 SCORES:
  language  f1_macro
0      eng  0.791830
1      hin  0.724999
2      spa  0.706230
3      urd  0.721408
4      zho  0.835746
5      arb  0.768296


In [5]:

drive.mount('/content/drive')
import torch
torch.cuda.empty_cache()
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from torch.utils.data import Dataset

# ---------------------------------------
# Dataset class
# ---------------------------------------
class PolarizationDataset(Dataset):
    def __init__(self, df, tokenizer, require_labels=True):
        self.texts = df["text"].fillna("").tolist()
        if require_labels:
            self.labels = df["polarization"].astype(int).tolist()
        else:
            self.labels = [0] * len(self.texts)  # dummy labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=128,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k,v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

# ---------------------------------------
# Load data
# ---------------------------------------
languages = ["eng","hin","spa","urd","zho","arb"]
data = {}

for lang in languages:
    train_df = pd.read_csv(f"subtask1/train/{lang}.csv")   # labeled
    dev_df   = pd.read_csv(f"subtask1/dev/{lang}.csv")     # unlabeled
    data[lang] = {"train": train_df, "dev": dev_df}

tokenizer = AutoTokenizer.from_pretrained("microsoft/mdeberta-v3-base")

# ---------------------------------------
# Metric
# ---------------------------------------
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {"f1_macro": f1_score(p.label_ids, preds, average="macro")}

# ---------------------------------------
# MAIN LOOP: TRAIN/VAL SPLIT + DEV PREDICTION
# ---------------------------------------
f1_results = []
predicted_outputs = {}

for lang, dfs in data.items():
    print("\n====================================")
    print(f" LANGUAGE: {lang}")

    train_df = dfs["train"]
    dev_df   = dfs["dev"]

    # 1️⃣ Filter ONLY labeled training rows
    train_labeled = train_df.dropna(subset=["polarization"]).reset_index(drop=True)

    # 2️⃣ Split train into train/validation
    train_split, val_split = train_test_split(
        train_labeled,
        test_size=0.20,
        stratify=train_labeled["polarization"],
        random_state=42,
        shuffle=True,
    )

    print(f"Train size: {len(train_split)},  Validation size: {len(val_split)}")

    train_dataset = PolarizationDataset(train_split, tokenizer, require_labels=True)
    val_dataset   = PolarizationDataset(val_split,   tokenizer, require_labels=True)

    # 3️⃣ Train model
    model = AutoModelForSequenceClassification.from_pretrained(
        "microsoft/mdeberta-v3-base", num_labels=2
    )

    training_args = TrainingArguments(
        output_dir=f"./model_{lang}",
        learning_rate=1e-5,
        num_train_epochs=10,
        per_device_train_batch_size=64,
        per_device_eval_batch_size=16,
        eval_strategy="epoch",
        save_strategy="no",
        logging_steps=20,

    )


    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        data_collator=DataCollatorWithPadding(tokenizer)
    )

    trainer.train()

    # 4️⃣ Compute F1 on validation
    metrics = trainer.evaluate()
    f1 = metrics["eval_f1_macro"]
    print(f" {lang} Validation F1 = {f1:.4f}")

    f1_results.append({"language": lang, "f1_macro": f1})

    # 5️⃣ Predict on dev (UNLABELED)
    print(f" Predicting for dev set ({len(dev_df)} rows)...")
    dev_dataset = PolarizationDataset(dev_df, tokenizer, require_labels=False)
    preds = trainer.predict(dev_dataset)
    pred_labels = np.argmax(preds.predictions, axis=1)

    dev_df["predicted_polarization"] = pred_labels
    predicted_outputs[lang] = dev_df

# ---------------------------------------
# SAVE PREDICTIONS
# ---------------------------------------
for lang, df_pred in predicted_outputs.items():
    df_pred.to_csv(f"{lang}_dev_predicted.csv", index=False)
    print(f" Saved: {lang}_dev_predicted.csv")

# ---------------------------------------
# FINAL F1 SCORES
# ---------------------------------------
f1_df = pd.DataFrame(f1_results)
print("\n FINAL F1 SCORES:")
print(f1_df)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).





 LANGUAGE: eng
Train size: 2140,  Validation size: 536


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6493,0.548584,0.384615
2,0.5185,0.519991,0.752472
3,0.4653,0.512483,0.765174
4,0.4279,0.476034,0.771526
5,0.3951,0.507187,0.777858
6,0.3502,0.496946,0.787183
7,0.3108,0.493687,0.788201
8,0.2973,0.501605,0.788201
9,0.2854,0.507672,0.792088
10,0.2638,0.513283,0.783977


 eng Validation F1 = 0.7840
 Predicting for dev set (133 rows)...

 LANGUAGE: hin
Train size: 2195,  Validation size: 549


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5316,0.404365,0.460707
2,0.3898,0.386658,0.460707
3,0.3314,0.343277,0.646393
4,0.2946,0.331015,0.642757
5,0.263,0.307892,0.710624
6,0.2075,0.308145,0.75319
7,0.1945,0.308011,0.737468
8,0.1719,0.313227,0.743592
9,0.1868,0.314294,0.741247
10,0.1528,0.316457,0.737468


 hin Validation F1 = 0.7375
 Predicting for dev set (137 rows)...

 LANGUAGE: spa
Train size: 2644,  Validation size: 661


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6915,0.687047,0.644224
2,0.5994,0.564916,0.715596
3,0.5063,0.550599,0.72723
4,0.4634,0.556177,0.750321
5,0.4166,0.560209,0.754912
6,0.3414,0.637651,0.737698
7,0.3461,0.618278,0.742681
8,0.3161,0.654075,0.739057
9,0.2925,0.644911,0.745629
10,0.2702,0.660517,0.740693


 spa Validation F1 = 0.7407
 Predicting for dev set (165 rows)...

 LANGUAGE: urd
Train size: 2279,  Validation size: 570


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6354,0.58591,0.409326
2,0.5426,0.510875,0.698058
3,0.4666,0.533393,0.711149
4,0.4041,0.528062,0.7098
5,0.3565,0.596112,0.692792
6,0.3256,0.588718,0.700725
7,0.2786,0.628294,0.700226
8,0.2747,0.598247,0.712454
9,0.2462,0.639238,0.710293
10,0.2365,0.655661,0.707146


 urd Validation F1 = 0.7071
 Predicting for dev set (142 rows)...

 LANGUAGE: zho
Train size: 3424,  Validation size: 856


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6403,0.523766,0.772496
2,0.459,0.428475,0.830579
3,0.3412,0.367117,0.85046
4,0.3129,0.367456,0.850721
5,0.2605,0.375407,0.859787
6,0.2077,0.382524,0.859847
7,0.1977,0.373274,0.868711
8,0.185,0.381128,0.86629
9,0.1548,0.392941,0.869101
10,0.1657,0.392194,0.867817


 zho Validation F1 = 0.8678
 Predicting for dev set (214 rows)...

 LANGUAGE: arb
Train size: 2704,  Validation size: 676


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.599,0.573268,0.690748
2,0.4471,0.472267,0.770227
3,0.3839,0.478639,0.776603
4,0.3177,0.489504,0.789676
5,0.294,0.513855,0.771557
6,0.2832,0.526995,0.771389
7,0.261,0.582942,0.761664
8,0.2194,0.570255,0.776886
9,0.1856,0.585845,0.778605
10,0.196,0.596242,0.776681


 arb Validation F1 = 0.7767
 Predicting for dev set (169 rows)...
 Saved: eng_dev_predicted.csv
 Saved: hin_dev_predicted.csv
 Saved: spa_dev_predicted.csv
 Saved: urd_dev_predicted.csv
 Saved: zho_dev_predicted.csv
 Saved: arb_dev_predicted.csv

 FINAL F1 SCORES:
  language  f1_macro
0      eng  0.783977
1      hin  0.737468
2      spa  0.740693
3      urd  0.707146
4      zho  0.867817
5      arb  0.776681


In [6]:

drive.mount('/content/drive')
import torch
torch.cuda.empty_cache()
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from torch.utils.data import Dataset

# ---------------------------------------
# Dataset class
# ---------------------------------------
class PolarizationDataset(Dataset):
    def __init__(self, df, tokenizer, require_labels=True):
        self.texts = df["text"].fillna("").tolist()
        if require_labels:
            self.labels = df["polarization"].astype(int).tolist()
        else:
            self.labels = [0] * len(self.texts)  # dummy labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=128,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k,v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

# ---------------------------------------
# Load data
# ---------------------------------------
languages = ["eng","hin","spa","urd","zho","arb"]
data = {}

for lang in languages:
    train_df = pd.read_csv(f"subtask1/train/{lang}.csv")   # labeled
    dev_df   = pd.read_csv(f"subtask1/dev/{lang}.csv")     # unlabeled
    data[lang] = {"train": train_df, "dev": dev_df}

tokenizer = AutoTokenizer.from_pretrained("microsoft/mdeberta-v3-base")

# ---------------------------------------
# Metric
# ---------------------------------------
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {"f1_macro": f1_score(p.label_ids, preds, average="macro")}

# ---------------------------------------
# MAIN LOOP: TRAIN/VAL SPLIT + DEV PREDICTION
# ---------------------------------------
f1_results = []
predicted_outputs = {}

for lang, dfs in data.items():
    print("\n====================================")
    print(f" LANGUAGE: {lang}")

    train_df = dfs["train"]
    dev_df   = dfs["dev"]

    # 1️⃣ Filter ONLY labeled training rows
    train_labeled = train_df.dropna(subset=["polarization"]).reset_index(drop=True)

    # 2️⃣ Split train into train/validation
    train_split, val_split = train_test_split(
        train_labeled,
        test_size=0.20,
        stratify=train_labeled["polarization"],
        random_state=42,
        shuffle=True,
    )

    print(f"Train size: {len(train_split)},  Validation size: {len(val_split)}")

    train_dataset = PolarizationDataset(train_split, tokenizer, require_labels=True)
    val_dataset   = PolarizationDataset(val_split,   tokenizer, require_labels=True)

    # 3️⃣ Train model
    model = AutoModelForSequenceClassification.from_pretrained(
        "microsoft/mdeberta-v3-base", num_labels=2
    )

    training_args = TrainingArguments(
    output_dir=f"./model_{lang}",
    learning_rate=1e-5,
    num_train_epochs=5,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=16,
    eval_strategy="epoch",
    save_strategy="no",
    logging_steps=20,

    # added as you requested
    weight_decay=0.01,
    warmup_ratio=0.1,
)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        data_collator=DataCollatorWithPadding(tokenizer)
    )

    trainer.train()

    # 4️⃣ Compute F1 on validation
    metrics = trainer.evaluate()
    f1 = metrics["eval_f1_macro"]
    print(f" {lang} Validation F1 = {f1:.4f}")

    f1_results.append({"language": lang, "f1_macro": f1})

    # 5️⃣ Predict on dev (UNLABELED)
    print(f" Predicting for dev set ({len(dev_df)} rows)...")
    dev_dataset = PolarizationDataset(dev_df, tokenizer, require_labels=False)
    preds = trainer.predict(dev_dataset)
    pred_labels = np.argmax(preds.predictions, axis=1)

    dev_df["predicted_polarization"] = pred_labels
    predicted_outputs[lang] = dev_df

# ---------------------------------------
# SAVE PREDICTIONS
# ---------------------------------------
for lang, df_pred in predicted_outputs.items():
    df_pred.to_csv(f"{lang}_dev_predicted.csv", index=False)
    print(f" Saved: {lang}_dev_predicted.csv")

# ---------------------------------------
# FINAL F1 SCORES
# ---------------------------------------
f1_df = pd.DataFrame(f1_results)
print("\n FINAL F1 SCORES:")
print(f1_df)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).





 LANGUAGE: eng
Train size: 2140,  Validation size: 536


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6743,0.575518,0.384615
2,0.5345,0.52042,0.682765
3,0.4752,0.497338,0.768257
4,0.4402,0.474629,0.778252
5,0.4149,0.46908,0.776085


 eng Validation F1 = 0.7761
 Predicting for dev set (133 rows)...

 LANGUAGE: hin
Train size: 2195,  Validation size: 549


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.7989,0.410252,0.460707
2,0.3908,0.35802,0.460707
3,0.3128,0.317473,0.674844
4,0.2822,0.301805,0.735543
5,0.279,0.300744,0.739867


 hin Validation F1 = 0.7399
 Predicting for dev set (137 rows)...

 LANGUAGE: spa
Train size: 2644,  Validation size: 661


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6934,0.689585,0.557413
2,0.6376,0.618649,0.689135
3,0.5657,0.57697,0.688319
4,0.5401,0.572387,0.723698
5,0.5179,0.559405,0.711873


 spa Validation F1 = 0.7119
 Predicting for dev set (165 rows)...

 LANGUAGE: urd
Train size: 2279,  Validation size: 570


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.7582,0.581413,0.409326
2,0.558,0.538239,0.409326
3,0.5155,0.526869,0.409326
4,0.4768,0.504108,0.646668
5,0.465,0.506018,0.710498


 urd Validation F1 = 0.7105
 Predicting for dev set (142 rows)...

 LANGUAGE: zho
Train size: 3424,  Validation size: 856


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6934,0.60514,0.755833
2,0.4802,0.464657,0.806048
3,0.3701,0.414098,0.829743
4,0.3152,0.408223,0.835283
5,0.2736,0.404882,0.838041


 zho Validation F1 = 0.8380
 Predicting for dev set (214 rows)...

 LANGUAGE: arb
Train size: 2704,  Validation size: 676


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6462,0.60924,0.656416
2,0.4739,0.47726,0.772171
3,0.4023,0.485606,0.757449
4,0.3665,0.482987,0.766117
5,0.3396,0.494079,0.765612


 arb Validation F1 = 0.7656
 Predicting for dev set (169 rows)...
 Saved: eng_dev_predicted.csv
 Saved: hin_dev_predicted.csv
 Saved: spa_dev_predicted.csv
 Saved: urd_dev_predicted.csv
 Saved: zho_dev_predicted.csv
 Saved: arb_dev_predicted.csv

 FINAL F1 SCORES:
  language  f1_macro
0      eng  0.776085
1      hin  0.739867
2      spa  0.711873
3      urd  0.710498
4      zho  0.838041
5      arb  0.765612


In [8]:

drive.mount('/content/drive')
import torch
torch.cuda.empty_cache()
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from torch.utils.data import Dataset

# ---------------------------------------
# Dataset class
# ---------------------------------------
class PolarizationDataset(Dataset):
    def __init__(self, df, tokenizer, require_labels=True):
        self.texts = df["text"].fillna("").tolist()
        if require_labels:
            self.labels = df["polarization"].astype(int).tolist()
        else:
            self.labels = [0] * len(self.texts)  # dummy labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=256,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k,v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

# ---------------------------------------
# Load data
# ---------------------------------------
languages = ["eng","hin","spa","urd","zho","arb"]
data = {}

for lang in languages:
    train_df = pd.read_csv(f"subtask1/train/{lang}.csv")   # labeled
    dev_df   = pd.read_csv(f"subtask1/dev/{lang}.csv")     # unlabeled
    data[lang] = {"train": train_df, "dev": dev_df}

tokenizer = AutoTokenizer.from_pretrained("microsoft/mdeberta-v3-base")

# ---------------------------------------
# Metric
# ---------------------------------------
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {"f1_macro": f1_score(p.label_ids, preds, average="macro")}

# ---------------------------------------
# MAIN LOOP: TRAIN/VAL SPLIT + DEV PREDICTION
# ---------------------------------------
f1_results = []
predicted_outputs = {}

for lang, dfs in data.items():
    print("\n====================================")
    print(f" LANGUAGE: {lang}")

    train_df = dfs["train"]
    dev_df   = dfs["dev"]

    # 1️⃣ Filter ONLY labeled training rows
    train_labeled = train_df.dropna(subset=["polarization"]).reset_index(drop=True)

    # 2️⃣ Split train into train/validation
    train_split, val_split = train_test_split(
        train_labeled,
        test_size=0.20,
        stratify=train_labeled["polarization"],
        random_state=42,
        shuffle=True,
    )

    print(f"Train size: {len(train_split)},  Validation size: {len(val_split)}")

    train_dataset = PolarizationDataset(train_split, tokenizer, require_labels=True)
    val_dataset   = PolarizationDataset(val_split,   tokenizer, require_labels=True)

    # 3️⃣ Train model
    model = AutoModelForSequenceClassification.from_pretrained(
        "microsoft/mdeberta-v3-base", num_labels=2
    )
    training_args = TrainingArguments(
    output_dir=f"./model_{lang}",
    learning_rate=1e-5,
    num_train_epochs=5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=16,

    warmup_ratio=0.1,
    lr_scheduler_type="cosine",
    weight_decay=0.01,

    fp16=True,
    max_grad_norm=1.0,

    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    greater_is_better=True,
    logging_steps=20,
)


    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        data_collator=DataCollatorWithPadding(tokenizer)
    )

    trainer.train()

    # 4️⃣ Compute F1 on validation
    metrics = trainer.evaluate()
    f1 = metrics["eval_f1_macro"]
    print(f" {lang} Validation F1 = {f1:.4f}")

    f1_results.append({"language": lang, "f1_macro": f1})

    # 5️⃣ Predict on dev (UNLABELED)
    print(f" Predicting for dev set ({len(dev_df)} rows)...")
    dev_dataset = PolarizationDataset(dev_df, tokenizer, require_labels=False)
    preds = trainer.predict(dev_dataset)
    pred_labels = np.argmax(preds.predictions, axis=1)

    dev_df["predicted_polarization"] = pred_labels
    predicted_outputs[lang] = dev_df

# ---------------------------------------
# SAVE PREDICTIONS
# ---------------------------------------
for lang, df_pred in predicted_outputs.items():
    df_pred.to_csv(f"{lang}_dev_predicted.csv", index=False)
    print(f" Saved: {lang}_dev_predicted.csv")

# ---------------------------------------
# FINAL F1 SCORES
# ---------------------------------------
f1_df = pd.DataFrame(f1_results)
print("\n FINAL F1 SCORES:")
print(f1_df)





Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).





 LANGUAGE: eng
Train size: 2140,  Validation size: 536


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5833,0.534133,0.384615
2,0.4811,0.475503,0.774326
3,0.3833,0.519145,0.762127
4,0.3451,0.464389,0.780857
5,0.3205,0.474658,0.783193


 eng Validation F1 = 0.7832
 Predicting for dev set (133 rows)...

 LANGUAGE: hin
Train size: 2195,  Validation size: 549


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.4134,0.389759,0.460707
2,0.3461,0.305972,0.731442
3,0.2668,0.302578,0.743138
4,0.2263,0.31657,0.762816
5,0.1644,0.315392,0.765854


 hin Validation F1 = 0.7659
 Predicting for dev set (137 rows)...

 LANGUAGE: spa
Train size: 2644,  Validation size: 661


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6795,0.643613,0.664312
2,0.5819,0.553948,0.71289
3,0.464,0.54994,0.735644
4,0.4214,0.559146,0.739057
5,0.4545,0.562146,0.739415


 spa Validation F1 = 0.7394
 Predicting for dev set (165 rows)...

 LANGUAGE: urd
Train size: 2279,  Validation size: 570


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6089,0.547224,0.409326
2,0.5363,0.49545,0.528194
3,0.457,0.471344,0.745109
4,0.4295,0.473527,0.737246
5,0.4002,0.473931,0.741766


 urd Validation F1 = 0.7451
 Predicting for dev set (142 rows)...

 LANGUAGE: zho
Train size: 3424,  Validation size: 856


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5051,0.458759,0.79795
2,0.3929,0.39189,0.83622
3,0.2979,0.378026,0.84919
4,0.2033,0.388466,0.861932
5,0.208,0.392325,0.862113


 zho Validation F1 = 0.8621
 Predicting for dev set (214 rows)...

 LANGUAGE: arb
Train size: 2704,  Validation size: 676


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5192,0.543641,0.701777
2,0.4148,0.466359,0.782544
3,0.3356,0.479337,0.773594
4,0.3014,0.521434,0.774456
5,0.2803,0.515198,0.765161


 arb Validation F1 = 0.7825
 Predicting for dev set (169 rows)...
 Saved: eng_dev_predicted.csv
 Saved: hin_dev_predicted.csv
 Saved: spa_dev_predicted.csv
 Saved: urd_dev_predicted.csv
 Saved: zho_dev_predicted.csv
 Saved: arb_dev_predicted.csv

 FINAL F1 SCORES:
  language  f1_macro
0      eng  0.783193
1      hin  0.765854
2      spa  0.739415
3      urd  0.745109
4      zho  0.862113
5      arb  0.782544
