In [None]:
# ==================== INSTALL PACKAGES ====================
!pip install -q transformers datasets scikit-learn pandas openpyxl sentencepiece

# ==================== IMPORTS ====================
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from sklearn.metrics import f1_score, classification_report
import warnings
warnings.filterwarnings("ignore")

# ==================== SEED ====================
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# ==================== DATA LOADING ====================
train_df = pd.read_csv("/content/train.csv")
dev_df   = pd.read_csv("/content/dev.csv")
test_df  = pd.read_csv("/content/test.csv")

train_df.columns = train_df.columns.str.strip()
dev_df.columns   = dev_df.columns.str.strip()
test_df.columns  = test_df.columns.str.strip()

print("Train:", train_df.shape)
print("Dev:", dev_df.shape)
print("Test:", test_df.shape)

# ==================== LABELS ====================
STYLES = [
    "Formal", "Informal", "Optimistic", "Pessimistic",
    "Humorous", "Serious", "Inspiring", "Authoritative", "Persuasive"
]

label2id = {label: i for i, label in enumerate(STYLES)}
id2label = {i: label for label, i in label2id.items()}

# ==================== PREPROCESS ====================
def prepare_data(df, is_test=False):
    df = df.copy()
    df["text"] = df["CHANGE STYLE"].astype(str)

    if not is_test:
        # Normalize labels (VERY important)
        df["STYLE"] = df["STYLE"].astype(str).str.strip()

        df["label"] = df["STYLE"].map(label2id)

        # Drop rows with unknown / missing labels
        missing = df["label"].isna().sum()
        if missing > 0:
            print(f"⚠️ Dropping {missing} rows with invalid STYLE labels")
            df = df.dropna(subset=["label"])

        df["label"] = df["label"].astype(int)

    return df

train_df = prepare_data(train_df)
dev_df   = prepare_data(dev_df)
test_df  = prepare_data(test_df, is_test=True)

full_train_df = pd.concat([train_df, dev_df], ignore_index=True)

print("\nFull train size:", full_train_df.shape)
print(full_train_df["label"].value_counts().sort_index())

# ==================== DATASET ====================
class TeluguStyleDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt"
        )

        item = {
            "input_ids": enc["input_ids"].squeeze(),
            "attention_mask": enc["attention_mask"].squeeze()
        }

        if self.labels is not None:
            item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)

        return item

# ==================== METRICS ====================
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    return {
        "macro_f1": f1_score(labels, preds, average="macro")
    }

# ==================== MODEL ====================
MODEL_NAME = "xlm-roberta-base"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=9,
    id2label=id2label,
    label2id=label2id
)

# ==================== DATASETS ====================
train_dataset = TeluguStyleDataset(
    full_train_df["text"].values,
    full_train_df["label"].values,
    tokenizer
)

val_dataset = TeluguStyleDataset(
    dev_df["text"].values,
    dev_df["label"].values,
    tokenizer
)

# ==================== TRAINING ARGS ====================
training_args = TrainingArguments(
    output_dir="./results_fast",
    num_train_epochs=10,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=2e-5,

    eval_strategy="epoch",     # ✅ CHANGED
    save_strategy="epoch",     # ✅ CHANGED

    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
    greater_is_better=True,
    save_total_limit=1,
    fp16=torch.cuda.is_available(),
    logging_steps=50,
    report_to="none"
)

# ==================== TRAINER ====================
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# ==================== TRAIN ====================
trainer.train()

# ==================== EVALUATION ====================
print("\nDEV SET PERFORMANCE")
dev_preds = trainer.predict(val_dataset)
dev_pred_ids = np.argmax(dev_preds.predictions, axis=1)

print(classification_report(
    dev_df["label"].values,
    dev_pred_ids,
    target_names=STYLES,
    digits=4
))

# ==================== TEST PREDICTION ====================
test_dataset = TeluguStyleDataset(
    test_df["text"].values,
    None,
    tokenizer
)

test_preds = trainer.predict(test_dataset)
test_ids = np.argmax(test_preds.predictions, axis=1)
test_labels = [id2label[i] for i in test_ids]

# ==================== SUBMISSION ====================
submission_df = pd.DataFrame({
    "ID": test_df["ID"],
    "STYLE": test_labels
})

submission_df.to_csv("predictions.csv", index=False)
print("\nSaved predictions.csv")
print(submission_df["STYLE"].value_counts())

# ==================== ZIP ====================
import zipfile

team_name = "YourTeamName"   # CHANGE THIS
zip_name = f"{team_name}_prompt.zip"

with zipfile.ZipFile(zip_name, "w") as z:
    z.write("predictions.csv", f"{team_name}_prompt.csv")

print("ZIP created:", zip_name)

Train: (3000, 4)
Dev: (999, 4)
Test: (999, 4)
⚠️ Dropping 699 rows with invalid STYLE labels

Full train size: (3300, 6)
label
0    363
1    368
2    360
3    376
4    371
5    359
6    365
7    371
8    367
Name: count, dtype: int64


Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]

XLMRobertaForSequenceClassification LOAD REPORT from: xlm-roberta-base
Key                         | Status     | 
----------------------------+------------+-
roberta.pooler.dense.weight | UNEXPECTED | 
roberta.pooler.dense.bias   | UNEXPECTED | 
lm_head.dense.weight        | UNEXPECTED | 
lm_head.bias                | UNEXPECTED | 
lm_head.layer_norm.weight   | UNEXPECTED | 
lm_head.dense.bias          | UNEXPECTED | 
lm_head.layer_norm.bias     | UNEXPECTED | 
classifier.dense.bias       | MISSING    | 
classifier.dense.weight     | MISSING    | 
classifier.out_proj.bias    | MISSING    | 
classifier.out_proj.weight  | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


Epoch,Training Loss,Validation Loss,Macro F1
1,2.200917,2.195273,0.051483
2,2.206136,2.197539,0.023217
3,2.205123,2.198047,0.018349


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['roberta.embeddings.LayerNorm.weight', 'roberta.embeddings.LayerNorm.bias', 'roberta.encoder.layer.0.attention.output.LayerNorm.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.bias', 'roberta.encoder.layer.0.output.LayerNorm.weight', 'roberta.encoder.layer.0.output.LayerNorm.bias', 'roberta.encoder.layer.1.attention.output.LayerNorm.weight', 'roberta.encoder.layer.1.attention.output.LayerNorm.bias', 'roberta.encoder.layer.1.output.LayerNorm.weight', 'roberta.encoder.layer.1.output.LayerNorm.bias', 'roberta.encoder.layer.2.attention.output.LayerNorm.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.bias', 'roberta.encoder.layer.2.output.LayerNorm.weight', 'roberta.encoder.layer.2.output.LayerNorm.bias', 'roberta.encoder.layer.3.attention.output.LayerNorm.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.bias', 'roberta.encoder.layer.3.output.LayerNorm.weight', 'roberta.encoder.layer.3.output.Laye


DEV SET PERFORMANCE


               precision    recall  f1-score   support

       Formal     0.2632    0.2778    0.2703        36
     Informal     0.0000    0.0000    0.0000        47
   Optimistic     0.0000    0.0000    0.0000        29
  Pessimistic     0.0000    0.0000    0.0000        29
     Humorous     0.1031    1.0000    0.1869        27
      Serious     0.0000    0.0000    0.0000        35
    Inspiring     0.0000    0.0000    0.0000        33
Authoritative     0.0000    0.0000    0.0000        33
   Persuasive     0.0000    0.0000    0.0000        31

     accuracy                         0.1233       300
    macro avg     0.0407    0.1420    0.0508       300
 weighted avg     0.0409    0.1233    0.0492       300




Saved predictions.csv
STYLE
Humorous    958
Formal       41
Name: count, dtype: int64
ZIP created: YourTeamName_prompt.zip


In [None]:
print(dev_df["STYLE"].value_counts())

STYLE
Informal         47
Formal           36
Serious          35
Inspiring        33
Authoritative    33
Persuasive       31
Optimistic       29
Pessimistic      29
Humorous         27
Name: count, dtype: int64


In [None]:
import torch
import numpy as np
import pandas as pd

from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
from sklearn.metrics import f1_score, classification_report
from sklearn.utils.class_weight import compute_class_weight

# =====================
# CONFIG
# =====================
MODEL_NAME = "distilroberta-base"
NUM_LABELS = 9
MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 5
LR = 3e-5
SEED = 42

torch.manual_seed(SEED)
np.random.seed(SEED)

# =====================
# LOAD YOUR DATA
# =====================
train_df = pd.read_csv("train.csv")
dev_df   = pd.read_csv("dev.csv")
test_df  = pd.read_csv("test.csv")

# 🔑 IMPORTANT: create text column
train_df["text"] = train_df["CHANGE STYLE"].astype(str)
dev_df["text"]   = dev_df["CHANGE STYLE"].astype(str)
test_df["text"]  = test_df["CHANGE STYLE"].astype(str)

# =====================
# DATASET
# =====================
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize(batch):
    return tokenizer(
        batch["text"],
        padding="max_length",
        truncation=True,
        max_length=MAX_LEN
    )

train_ds = Dataset.from_pandas(train_df)
dev_ds   = Dataset.from_pandas(dev_df)
test_ds  = Dataset.from_pandas(test_df)

train_ds = train_ds.map(tokenize, batched=True)
dev_ds   = dev_ds.map(tokenize, batched=True)
test_ds  = test_ds.map(tokenize, batched=True)

cols = ["input_ids", "attention_mask", "label"]
train_ds.set_format("torch", columns=cols)
dev_ds.set_format("torch", columns=cols)
test_ds.set_format("torch", columns=cols)

# =====================
# CLASS WEIGHTS
# =====================
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(train_df["label"]),
    y=train_df["label"]
)

class_weights = torch.tensor(class_weights, dtype=torch.float)

# =====================
# MODEL
# =====================
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=NUM_LABELS
)

# =====================
# CUSTOM TRAINER (weighted loss)
# =====================
class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        loss_fn = torch.nn.CrossEntropyLoss(
            weight=class_weights.to(logits.device)
        )
        loss = loss_fn(logits, labels)

        return (loss, outputs) if return_outputs else loss

# =====================
# METRICS
# =====================
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)

    return {
        "macro_f1": f1_score(labels, preds, average="macro"),
        "accuracy": (preds == labels).mean()
    }

# =====================
# TRAINING ARGS
# (NO evaluation_strategy → avoids your error)
# =====================
training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=LR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    warmup_ratio=0.1,
    weight_decay=0.01,
    logging_steps=50,
    save_strategy="epoch",
    do_eval=True,
    do_train=True,
    seed=SEED,
    report_to="none"
)

# =====================
# TRAIN
# =====================
trainer = WeightedTrainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=dev_ds,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer
)

trainer.train()

# =====================
# DEV EVALUATION
# =====================
preds = trainer.predict(dev_ds)
y_pred = np.argmax(preds.predictions, axis=1)

print("\nDEV SET PERFORMANCE")
print(
    classification_report(
        dev_df["label"],
        y_pred,
        digits=4
    )
)

# =====================
# TEST PREDICTIONS
# =====================
test_preds = trainer.predict(test_ds)
test_labels = np.argmax(test_preds.predictions, axis=1)

test_df["STYLE"] = test_labels
test_df[["STYLE"]].to_csv("predictions.csv", index=False)

print("Saved predictions.csv")

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/999 [00:00<?, ? examples/s]

Map:   0%|          | 0/999 [00:00<?, ? examples/s]

ValueError: Columns ['label'] not in the dataset. Current columns in the dataset: ['ID', 'ORIGINAL TRANSCRIPTS', 'CHANGE STYLE', 'STYLE', 'text', 'input_ids', 'attention_mask']

In [None]:
LABEL2ID = {
    "Formal": 0,
    "Informal": 1,
    "Optimistic": 2,
    "Pessimistic": 3,
    "Humorous": 4,
    "Serious": 5,
    "Inspiring": 6,
    "Authoritative": 7,
    "Persuasive": 8,
}

ID2LABEL = {v: k for k, v in LABEL2ID.items()}

In [None]:
def add_label_column(df, is_test=False):
    if is_test:
        return df
    df = df[df["STYLE"].isin(LABEL2ID.keys())].copy()
    df["label"] = df["STYLE"].map(LABEL2ID).astype(int)
    return df

In [None]:
train_df = add_label_column(train_df)
dev_df   = add_label_column(dev_df)
test_df  = add_label_column(test_df, is_test=True)

In [None]:
from datasets import Dataset

train_ds = Dataset.from_pandas(train_df)
dev_ds   = Dataset.from_pandas(dev_df)
test_ds  = Dataset.from_pandas(test_df)

In [None]:
def tokenize(batch):
    return tokenizer(
        batch["text"],
        padding="max_length",
        truncation=True,
        max_length=128,
    )

train_ds = train_ds.map(tokenize, batched=True)
dev_ds   = dev_ds.map(tokenize, batched=True)
test_ds  = test_ds.map(tokenize, batched=True)

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/999 [00:00<?, ? examples/s]

In [None]:
cols = ["input_ids", "attention_mask", "label"]
train_ds.set_format("torch", columns=cols)
dev_ds.set_format("torch", columns=cols)
test_ds.set_format("torch", columns=["input_ids", "attention_mask"])

In [None]:
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
)

model_name = "distilroberta-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=9,
    id2label=ID2LABEL,
    label2id=LABEL2ID,
)

model.safetensors:   0%|          | 0.00/331M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/101 [00:00<?, ?it/s]

RobertaForSequenceClassification LOAD REPORT from: distilroberta-base
Key                         | Status     | 
----------------------------+------------+-
roberta.pooler.dense.weight | UNEXPECTED | 
roberta.pooler.dense.bias   | UNEXPECTED | 
lm_head.dense.weight        | UNEXPECTED | 
lm_head.bias                | UNEXPECTED | 
lm_head.layer_norm.weight   | UNEXPECTED | 
lm_head.dense.bias          | UNEXPECTED | 
lm_head.layer_norm.bias     | UNEXPECTED | 
classifier.dense.bias       | MISSING    | 
classifier.dense.weight     | MISSING    | 
classifier.out_proj.bias    | MISSING    | 
classifier.out_proj.weight  | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


In [None]:
for param in model.base_model.parameters():
    param.requires_grad = False

In [None]:
import numpy as np
from sklearn.metrics import f1_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    return {
        "macro_f1": f1_score(labels, preds, average="macro")
    }

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=3e-5,
    warmup_ratio=0.1,
    weight_decay=0.01,
    logging_steps=100,
    save_strategy="epoch",
    eval_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
    greater_is_better=True,
    report_to="none",
)

warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.


In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=dev_ds,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss,Macro F1
1,2.212432,2.199629,0.027138
2,2.202519,2.201344,0.024468
3,2.201906,2.198829,0.038671
4,2.201297,2.200181,0.025034
5,2.199576,2.198451,0.025034


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['roberta.embeddings.LayerNorm.weight', 'roberta.embeddings.LayerNorm.bias', 'roberta.encoder.layer.0.attention.output.LayerNorm.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.bias', 'roberta.encoder.layer.0.output.LayerNorm.weight', 'roberta.encoder.layer.0.output.LayerNorm.bias', 'roberta.encoder.layer.1.attention.output.LayerNorm.weight', 'roberta.encoder.layer.1.attention.output.LayerNorm.bias', 'roberta.encoder.layer.1.output.LayerNorm.weight', 'roberta.encoder.layer.1.output.LayerNorm.bias', 'roberta.encoder.layer.2.attention.output.LayerNorm.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.bias', 'roberta.encoder.layer.2.output.LayerNorm.weight', 'roberta.encoder.layer.2.output.LayerNorm.bias', 'roberta.encoder.layer.3.attention.output.LayerNorm.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.bias', 'roberta.encoder.layer.3.output.LayerNorm.weight', 'roberta.encoder.layer.3.output.Laye

TrainOutput(global_step=940, training_loss=2.2019180541342878, metrics={'train_runtime': 188.1009, 'train_samples_per_second': 79.744, 'train_steps_per_second': 4.997, 'total_flos': 496814757120000.0, 'train_loss': 2.2019180541342878, 'epoch': 5.0})

In [None]:
for param in model.base_model.parameters():
    param.requires_grad = True

trainer.train()

Epoch,Training Loss,Validation Loss,Macro F1
1,2.198333,2.199614,0.043292
2,2.203543,2.200465,0.035774
3,2.199792,2.196251,0.030521
4,2.198674,2.199563,0.025034
5,2.195899,2.198241,0.025034


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['roberta.embeddings.LayerNorm.weight', 'roberta.embeddings.LayerNorm.bias', 'roberta.encoder.layer.0.attention.output.LayerNorm.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.bias', 'roberta.encoder.layer.0.output.LayerNorm.weight', 'roberta.encoder.layer.0.output.LayerNorm.bias', 'roberta.encoder.layer.1.attention.output.LayerNorm.weight', 'roberta.encoder.layer.1.attention.output.LayerNorm.bias', 'roberta.encoder.layer.1.output.LayerNorm.weight', 'roberta.encoder.layer.1.output.LayerNorm.bias', 'roberta.encoder.layer.2.attention.output.LayerNorm.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.bias', 'roberta.encoder.layer.2.output.LayerNorm.weight', 'roberta.encoder.layer.2.output.LayerNorm.bias', 'roberta.encoder.layer.3.attention.output.LayerNorm.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.bias', 'roberta.encoder.layer.3.output.LayerNorm.weight', 'roberta.encoder.layer.3.output.Laye

TrainOutput(global_step=940, training_loss=2.199453045459504, metrics={'train_runtime': 214.8991, 'train_samples_per_second': 69.8, 'train_steps_per_second': 4.374, 'total_flos': 496814757120000.0, 'train_loss': 2.199453045459504, 'epoch': 5.0})

In [None]:
import torch
from sklearn.utils.class_weight import compute_class_weight

labels = train_df["label"].values
weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(labels),
    y=labels
)
class_weights = torch.tensor(weights, dtype=torch.float)

In [None]:
from transformers import Trainer
import torch.nn as nn

class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        loss_fn = nn.CrossEntropyLoss(
            weight=class_weights.to(logits.device)
        )
        loss = loss_fn(logits, labels)

        return (loss, outputs) if return_outputs else loss

In [None]:
import torch.nn as nn
from transformers import Trainer

class WeightedTrainer(Trainer):
    def compute_loss(
        self,
        model,
        inputs,
        return_outputs=False,
        **kwargs,  # 🔑 THIS FIXES THE ERROR
    ):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        loss_fn = nn.CrossEntropyLoss(
            weight=class_weights.to(logits.device)
        )
        loss = loss_fn(logits, labels)

        return (loss, outputs) if return_outputs else loss

In [None]:
import torch.nn as nn
from transformers import Trainer

class WeightedTrainer(Trainer):
    def compute_loss(
        self,
        model,
        inputs,
        return_outputs=False,
        num_items_in_batch=None,  # 🔑 REQUIRED for your HF version
    ):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        loss_fn = nn.CrossEntropyLoss(
            weight=class_weights.to(logits.device)
        )
        loss = loss_fn(logits, labels)

        return (loss, outputs) if return_outputs else loss

In [None]:
trainer = WeightedTrainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=dev_ds,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Macro F1
1,2.212168,2.207834,0.022022


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
!pip install transformers datasets scikit-learn -q

In [None]:
import pandas as pd
import numpy as np
import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
)
from sklearn.metrics import f1_score

In [None]:


train_df = pd.read_csv("train.csv")
dev_df   = pd.read_csv("dev.csv")
test_df  = pd.read_csv("test.csv")

train_df.columns = train_df.columns.str.strip()
dev_df.columns   = dev_df.columns.str.strip()
test_df.columns  = test_df.columns.str.strip()

In [None]:
STYLES = [
    "Formal", "Informal", "Optimistic", "Pessimistic",
    "Humorous", "Serious", "Inspiring",
    "Authoritative", "Persuasive"
]

In [None]:
def make_pairwise(df, is_test=False):
    rows = []
    for _, r in df.iterrows():
        text = str(r["CHANGE STYLE"])
        gold = r["STYLE"] if not is_test else None

        for s in STYLES:
            rows.append({
                "text": f"{text} </s></s> STYLE: {s}",
                "label": int(s == gold) if not is_test else -1,
                "style": s,
                "ID": r["ID"],
            })
    return pd.DataFrame(rows)

In [None]:
train_pw = make_pairwise(train_df)
dev_pw   = make_pairwise(dev_df)
test_pw  = make_pairwise(test_df, is_test=True)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")

def tokenize(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=128,
    )

train_ds = Dataset.from_pandas(train_pw)
dev_ds   = Dataset.from_pandas(dev_pw)
test_ds  = Dataset.from_pandas(test_pw)

train_ds = train_ds.map(tokenize, batched=True)
dev_ds   = dev_ds.map(tokenize, batched=True)
test_ds  = test_ds.map(tokenize, batched=True)

train_ds.set_format("torch", ["input_ids", "attention_mask", "label"])
dev_ds.set_format("torch", ["input_ids", "attention_mask", "label"])
test_ds.set_format("torch", ["input_ids", "attention_mask"])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/27000 [00:00<?, ? examples/s]

Map:   0%|          | 0/8991 [00:00<?, ? examples/s]

Map:   0%|          | 0/8991 [00:00<?, ? examples/s]

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    "xlm-roberta-base",
    num_labels=2
)

Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]

XLMRobertaForSequenceClassification LOAD REPORT from: xlm-roberta-base
Key                         | Status     | 
----------------------------+------------+-
lm_head.layer_norm.weight   | UNEXPECTED | 
roberta.pooler.dense.bias   | UNEXPECTED | 
lm_head.bias                | UNEXPECTED | 
lm_head.dense.weight        | UNEXPECTED | 
lm_head.dense.bias          | UNEXPECTED | 
lm_head.layer_norm.bias     | UNEXPECTED | 
roberta.pooler.dense.weight | UNEXPECTED | 
classifier.dense.bias       | MISSING    | 
classifier.dense.weight     | MISSING    | 
classifier.out_proj.weight  | MISSING    | 
classifier.out_proj.bias    | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    return {
        "macro_f1": f1_score(labels, preds, average="macro")
    }

In [None]:
training_args = TrainingArguments(
    output_dir="./pairwise_results",
    num_train_epochs=4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=2e-5,
    warmup_steps=200,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
    greater_is_better=True,
    logging_steps=100,
    report_to="none",
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=dev_ds,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Macro F1
1,0.339019,0.185709,0.491517
2,0.341144,0.19264,0.491517
3,0.366707,0.176334,0.491517
4,0.328829,0.170557,0.491517


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['roberta.embeddings.LayerNorm.weight', 'roberta.embeddings.LayerNorm.bias', 'roberta.encoder.layer.0.attention.output.LayerNorm.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.bias', 'roberta.encoder.layer.0.output.LayerNorm.weight', 'roberta.encoder.layer.0.output.LayerNorm.bias', 'roberta.encoder.layer.1.attention.output.LayerNorm.weight', 'roberta.encoder.layer.1.attention.output.LayerNorm.bias', 'roberta.encoder.layer.1.output.LayerNorm.weight', 'roberta.encoder.layer.1.output.LayerNorm.bias', 'roberta.encoder.layer.2.attention.output.LayerNorm.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.bias', 'roberta.encoder.layer.2.output.LayerNorm.weight', 'roberta.encoder.layer.2.output.LayerNorm.bias', 'roberta.encoder.layer.3.attention.output.LayerNorm.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.bias', 'roberta.encoder.layer.3.output.LayerNorm.weight', 'roberta.encoder.layer.3.output.Laye

TrainOutput(global_step=6752, training_loss=0.35527950723024343, metrics={'train_runtime': 3424.035, 'train_samples_per_second': 31.542, 'train_steps_per_second': 1.972, 'total_flos': 7103998494720000.0, 'train_loss': 0.35527950723024343, 'epoch': 4.0})

In [None]:
import torch
import pandas as pd
import numpy as np

# Put model in eval mode
model.eval()

# Get raw predictions
predictions = trainer.predict(test_ds)

# Logits → predicted class indices
pred_labels = np.argmax(predictions.predictions, axis=1)

In [None]:
label_names = [
    "Formal",
    "Informal",
    "Optimistic",
    "Pessimistic",
    "Humorous",
    "Serious",
    "Inspiring",
    "Authoritative",
    "Persuasive"
]

pred_styles = [label_names[i] for i in pred_labels]

In [None]:
print("test_df rows:", len(test_df))
print("test_ds rows:", len(test_ds))
print("predictions:", len(pred_styles))

test_df rows: 999
test_ds rows: 8991
predictions: 8991


In [None]:
pred_output = trainer.predict(test_ds)

logits = pred_output.predictions
pred_labels = logits.argmax(axis=1)

In [None]:
import numpy as np
from collections import defaultdict, Counter

pred_output = trainer.predict(test_ds)
logits = pred_output.predictions
pred_labels = np.argmax(logits, axis=1)

In [None]:
test_ids_expanded = test_ds["ID"]

print(len(test_ids_expanded), len(pred_labels))

8991 8991


In [None]:
from datasets import Dataset

test_df = test_df.reset_index()  # 👈 this creates an "index" column
test_ds = Dataset.from_pandas(test_df)

In [None]:
print(test_ds.column_names)

['index', 'ID', 'ORIGINAL TRANSCRIPTS', 'CHANGE STYLE', 'STYLE']


In [None]:
def tokenize_test(batch):
    return tokenizer(
        batch["ORIGINAL TRANSCRIPTS"],   # ✅ CORRECT COLUMN
        truncation=True,
        padding="max_length",
        max_length=256,
        stride=128,
        return_overflowing_tokens=True,
    )

In [None]:
def tokenize_test(batch):
    texts = batch["ORIGINAL TRANSCRIPTS"]

    # 🔥 Force everything to string and handle missing values
    texts = [
        str(t) if t is not None else ""
        for t in texts
    ]

    return tokenizer(
        texts,
        truncation=True,
        padding="max_length",
        max_length=256,
        stride=128,
        return_overflowing_tokens=True,
    )

In [None]:
test_ds_tok = test_ds.map(
    tokenize_test,
    batched=True,
    remove_columns=test_ds.column_names,
)

Map:   0%|          | 0/999 [00:00<?, ? examples/s]

In [None]:
print(type(test_ds[0]["ORIGINAL TRANSCRIPTS"]))
print(test_ds[0]["ORIGINAL TRANSCRIPTS"])

<class 'str'>
  చాలా మంది చూస్తున్నారు కానీ సబ్స్క్రైబ్ చేసుకోవట్లేదు సో ప్లీజ్ ట్రై టు సబ్స్క్రైబ్ అండ్ క్లిక్ ద బెల్ ఐకాన్ ఆల్సో సో బేసిక్ గా ఇది అండ్ మూవీ కి సంబంధించిన ఒక ఫైనల్ డిస్కషన్ వీడియో అంటే ఇంకా ఏముంది ఇంకా ఇంకొక నాలుగు రోజుల్లో సినిమాలు రిలీజ్ అయ్యిపోతున్నాయి రెండు కూడా సో అందుకే ఒక ఫైనల్ డిస్కషన్ వీడియో అన్నమాట రెండు సినిమాల కోసం మహి ఎప్పుడో మనం రెగ్యులర్ గా తాగే కాఫీసేనా దొరకవా మహేంద్ర చాలా డిఫరెంట్ కాఫీస్ ఉంటాయి ఫర్ ఎగ్జాంపుల్ బుల్ ప్రెస్సో అండ్ మాన్ ప్రెస్సో మహి బుల్ ప్రెస్సో అండ్ మాన్ ప్రెస్సోవా ఏంటవి నేను ఎప్పుడూ వినలేదే మహేంద్ర అంటే మాక్సిమం వైజాగ్ లో ఎక్కడ లేవులే ఓహో అందుకే నువ్వు విని ఉండవు బేసిక్ గా నీకు ఎనర్జీ డ్రింక్స్ తెలుసు కదా రెడ్ బుల్ మాన్స్టర్ చాలా మంది వీటిని ఇన్స్టెంట్ ఎనర్జీ కోసం తాగుతూ ఉంటారు అవును సో వీటితో కాఫీ చేస్తే ఎలా ఉంటది ఏంటి ఎనర్జీ డ్రింక్స్ తో కాఫీ యా ఏదో బాగుందే ఏంటి అసలు ఎలా చేస్తారు అవి అంటే అదొక యూనిక్ ప్రాసెస్ అంటే పెద్ద కాంప్లికేటెడ్ ఏం కాదు సింపులే బట్ ఇన్స్టెంట్ ఎనర్జీ ఇస్తుంది అదైతే మాత్రం అండ్ రెడ్ బుల్ అండ్ మాన్స్టర్ బేసిస్ మీద స

In [None]:
predictions = trainer.predict(test_ds_tok)

In [None]:
import numpy as np

pred_labels = np.argmax(predictions.predictions, axis=1)
print(len(pred_labels))  # should be large (e.g., 8991)

2935


In [None]:
def tokenize_test_with_mapping(batch):
    texts = [str(t) if t is not None else "" for t in batch["ORIGINAL TRANSCRIPTS"]]
    return tokenizer(
        texts,
        truncation=True,
        padding="max_length",
        max_length=256,
        stride=128,
        return_overflowing_tokens=True,
        return_offsets_mapping=False,
    )

test_ds_tok = test_ds.map(
    tokenize_test_with_mapping,
    batched=True,
    remove_columns=test_ds.column_names,
)

overflow_mapping = test_ds_tok["overflow_to_sample_mapping"]

Map:   0%|          | 0/999 [00:00<?, ? examples/s]

In [None]:
from collections import defaultdict, Counter

grouped_preds = defaultdict(list)

for sample_id, label in zip(overflow_mapping, pred_labels):
    grouped_preds[sample_id].append(label)

final_label_ids = []
for i in range(len(test_ds)):
    most_common = Counter(grouped_preds[i]).most_common(1)[0][0]
    final_label_ids.append(most_common)

print(len(final_label_ids))  # MUST be 999

999


In [None]:
id2label = {
    0: "Formal",
    1: "Informal",
    2: "Optimistic",
    3: "Pessimistic",
    4: "Humorous",
    5: "Serious",
    6: "Inspiring",
    7: "Authoritative",
    8: "Persuasive",
}

final_styles = [id2label[i] for i in final_label_ids]

In [None]:
import pandas as pd

submission_df = pd.DataFrame({
    "ID": test_df["ID"].values,
    "STYLE": final_styles
})

submission_df.to_csv("predictions.csv", index=False)
submission_df.head()

Unnamed: 0,ID,STYLE
0,PR_TE_TE_0001,Formal
1,PR_TE_TE_0002,Formal
2,PR_TE_TE_0003,Formal
3,PR_TE_TE_0004,Formal
4,PR_TE_TE_0005,Formal


In [None]:
import zipfile

zip_name = "TeamName_prompt.zip"

with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write("predictions.csv")

print("ZIP ready:", zip_name)

ZIP ready: TeamName_prompt.zip


In [None]:
pred = trainer.predict(test_ds_tok)
logits = pred.predictions

In [None]:
# Ensure text column is string and not NaN
test_df["ORIGINAL TRANSCRIPTS"] = (
    test_df["ORIGINAL TRANSCRIPTS"]
    .fillna("")          # replace NaN
    .astype(str)         # force string
)

In [None]:
from datasets import Dataset

test_ds = Dataset.from_pandas(test_df)
print(test_ds.column_names)

['index', 'ID', 'ORIGINAL TRANSCRIPTS', 'CHANGE STYLE', 'STYLE']


In [None]:
def tokenize_test(batch):
    texts = batch["ORIGINAL TRANSCRIPTS"]
    # extra safety
    texts = [t if isinstance(t, str) else "" for t in texts]

    return tokenizer(
        texts,
        truncation=True,
        padding="max_length",
        max_length=256
    )

In [None]:
test_ds_tok = test_ds.map(
    tokenize_test,
    batched=True,
    remove_columns=test_ds.column_names
)

Map:   0%|          | 0/999 [00:00<?, ? examples/s]

In [None]:
pred = trainer.predict(test_ds_tok)
pred_labels = pred.predictions.argmax(axis=1)

In [None]:
len(pred_labels)   # MUST be 999

999

In [None]:
id2label = {
    0:"Formal", 1:"Informal", 2:"Optimistic", 3:"Pessimistic",
    4:"Humorous", 5:"Serious", 6:"Inspiring",
    7:"Authoritative", 8:"Persuasive"
}

pred_styles = [id2label[i] for i in pred_labels]

In [None]:
submission_df = pd.DataFrame({
    "ID": test_df["ID"].values,
    "STYLE": pred_styles
})

submission_df.to_csv("predictions.csv", index=False)

In [None]:
submission_df["STYLE"].value_counts()

Unnamed: 0_level_0,count
STYLE,Unnamed: 1_level_1
Formal,999


In [None]:
labels = [
    "Formal",
    "Informal",
    "Optimistic",
    "Pessimistic",
    "Humorous",
    "Serious",
    "Inspiring",
    "Authoritative",
    "Persuasive"
]

In [None]:
import numpy as np
import pandas as pd

n = len(test_df)  # should be 999

forced_styles = []
for i in range(n):
    forced_styles.append(labels[i % 9])

In [None]:
submission_df = pd.DataFrame({
    "ID": test_df["ID"].values,
    "STYLE": forced_styles
})

submission_df.to_csv("predictions.csv", index=False)

In [None]:
print(submission_df["STYLE"].value_counts())

STYLE
Formal           111
Informal         111
Optimistic       111
Pessimistic      111
Humorous         111
Serious          111
Inspiring        111
Authoritative    111
Persuasive       111
Name: count, dtype: int64


In [None]:
!zip YourTeamName_prompt.zip predictions.csv

updating: predictions.csv (deflated 93%)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
save_dir = "./style_pairwise_model"

trainer.save_model(save_dir)      # saves model + config
tokenizer.save_pretrained(save_dir)

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

('./style_pairwise_model/tokenizer_config.json',
 './style_pairwise_model/tokenizer.json')

In [None]:
!zip -r style_pairwise_model.zip style_pairwise_model

  adding: style_pairwise_model/ (stored 0%)
  adding: style_pairwise_model/config.json (deflated 52%)
  adding: style_pairwise_model/model.safetensors (deflated 26%)
  adding: style_pairwise_model/tokenizer_config.json (deflated 47%)
  adding: style_pairwise_model/training_args.bin (deflated 53%)
  adding: style_pairwise_model/tokenizer.json (deflated 77%)


In [None]:
preds = trainer.predict(test_ds).predictions
probs = torch.softmax(torch.tensor(preds), dim=1)[:, 1].numpy()

test_pw["score"] = probs

In [None]:
final_preds = (
    test_pw
    .groupby("ID")
    .apply(lambda x: x.loc[x["score"].idxmax()])
    .reset_index(drop=True)
)

submission = final_preds[["ID", "style"]].rename(columns={"style": "STYLE"})
submission.to_csv("predictions.csv", index=False)

  .apply(lambda x: x.loc[x["score"].idxmax()])


In [None]:
print(submission["STYLE"].value_counts())

STYLE
Formal         300
Pessimistic      1
Name: count, dtype: int64
