In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/anlp-datasets/preprocessed data/twitter/twitter_test.csv
/kaggle/input/anlp-datasets/preprocessed data/twitter/twitter_dev.csv
/kaggle/input/anlp-datasets/preprocessed data/twitter/twitter_train.csv
/kaggle/input/anlp-datasets/preprocessed data/news Headlines/news_headlines_test.csv
/kaggle/input/anlp-datasets/preprocessed data/news Headlines/news_headlines_train.csv
/kaggle/input/anlp-datasets/preprocessed data/news Headlines/news_headlines_dev.csv
/kaggle/input/anlp-datasets/preprocessed data/reddit/reddit_dev.csv
/kaggle/input/anlp-datasets/preprocessed data/reddit/reddit_test.csv
/kaggle/input/anlp-datasets/preprocessed data/reddit/reddit_train.csv


### Twitter Dataset

In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, f1_score
import itertools
import warnings

warnings.filterwarnings('ignore')

# Custom Dataset Class
class SarcasmDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {
            'input_ids': torch.tensor(self.encodings['input_ids'][idx], dtype=torch.long),
            'attention_mask': torch.tensor(self.encodings['attention_mask'][idx], dtype=torch.long),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }
        return item

    def __len__(self):
        return len(self.labels)

# Metrics Computation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {
        "accuracy": accuracy_score(y_true=labels, y_pred=predictions),
        "f1_score": f1_score(y_true=labels, y_pred=predictions)
    }

# Custom Trainer Class
class CustomTrainer(Trainer):
    def __init__(self, class_weights=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.class_weights = class_weights.float() if class_weights is not None else torch.tensor([1.0, 1.0], dtype=torch.float32)

    def compute_loss(self, model, inputs, return_outputs=False):
        inputs = {
            'input_ids': inputs['input_ids'].long(),
            'attention_mask': inputs['attention_mask'].long(),
            'labels': inputs['labels'].long()
        }
        outputs = model(**inputs)
        logits = outputs.get('logits')
        class_weights = self.class_weights.to(logits.device)
        loss_fct = nn.CrossEntropyLoss(weight=class_weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), inputs['labels'].view(-1))
        return (loss, outputs) if return_outputs else loss

# Dataset Preparation
def prepare_datasets(tokenizer):
    train = pd.read_csv('/kaggle/input/anlp-datasets/preprocessed data/twitter/twitter_train.csv')
    val = pd.read_csv('/kaggle/input/anlp-datasets/preprocessed data/twitter/twitter_test.csv')

    train_tweets = train['preprocessed_text'].tolist()
    train_labels = train['Label'].apply(lambda x: 1 if x != 0 else 0).tolist()
    val_tweets = val['preprocessed_text'].tolist()
    val_labels = val['Label'].apply(lambda x: 1 if x != 0 else 0).tolist()

    train_encodings = tokenizer(train_tweets, truncation=True, padding=True, max_length=128, return_tensors='pt')
    val_encodings = tokenizer(val_tweets, truncation=True, padding=True, max_length=128, return_tensors='pt')

    train_dataset = SarcasmDataset(train_encodings, train_labels)
    val_dataset = SarcasmDataset(val_encodings, val_labels)

    label_counts = pd.Series(train_labels).value_counts()
    total_samples = len(train_labels)
    class_weights = torch.tensor([
        total_samples / (len(label_counts) * label_counts[0]),
        total_samples / (len(label_counts) * label_counts[1])
    ], dtype=torch.float32)

    return train_dataset, val_dataset, class_weights

# Hyperparameter Grid Search
def grid_search():
    param_grid = {
        "learning_rate": [1e-6, 5e-6, 1e-5],
        "batch_size": [16, 32],
        "weight_decay": [1e-4, 1e-2],
        "warmup_steps": [100, 500],
        "num_epochs": [2, 3]
    }

    param_combinations = list(itertools.product(*param_grid.values()))
    param_names = list(param_grid.keys())

    model_name = "cardiffnlp/twitter-roberta-base-sentiment"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,
        ignore_mismatched_sizes=True
    )

    train_dataset, val_dataset, class_weights = prepare_datasets(tokenizer)

    best_f1 = 0
    best_params = None

    for combination in param_combinations:
        params = dict(zip(param_names, combination))

        training_args = TrainingArguments(
            output_dir='/tmp/temp_trainer',
            evaluation_strategy="steps",
            eval_steps=500,
            num_train_epochs=params["num_epochs"],
            per_device_train_batch_size=params["batch_size"],
            per_device_eval_batch_size=params["batch_size"] * 2,
            warmup_steps=params["warmup_steps"],
            weight_decay=params["weight_decay"],
            learning_rate=params["learning_rate"],
            max_grad_norm=1.0,
            save_strategy="no",
            report_to="none"
        )

        trainer = CustomTrainer(
            class_weights=class_weights,
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics,
        )

        trainer.train()
        eval_results = trainer.evaluate()

        print(f"Params: {params}, F1 Score: {eval_results['eval_f1_score']}")

        if eval_results["eval_f1_score"] > best_f1:
            best_f1 = eval_results["eval_f1_score"]
            best_params = params

    print(f"Best F1 Score: {best_f1}")
    print(f"Best Params: {best_params}")

    return best_params

# Main Function
def main():
    best_params = grid_search()
    print("\nTraining final model with best parameters...")

    model_name = "cardiffnlp/twitter-roberta-base-sentiment"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,
        ignore_mismatched_sizes=True
    )

    train_dataset, val_dataset, class_weights = prepare_datasets(tokenizer)

    training_args = TrainingArguments(
        output_dir='./final_model',
        evaluation_strategy="steps",
        eval_steps=500,
        num_train_epochs=best_params["num_epochs"],
        per_device_train_batch_size=best_params["batch_size"],
        per_device_eval_batch_size=best_params["batch_size"] * 2,
        warmup_steps=best_params["warmup_steps"],
        weight_decay=best_params["weight_decay"],
        learning_rate=best_params["learning_rate"],
        max_grad_norm=1.0,
        save_strategy="epoch",
        save_total_limit=1
    )

    trainer = CustomTrainer(
        class_weights=class_weights,
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    final_results = trainer.evaluate()
    print(f"\nFinal Model Results: {final_results}")

    trainer.save_model('./sarcasm_detector_model_tuned')
    tokenizer.save_pretrained('./sarcasm_detector_model_tuned')

if __name__ == '__main__':
    main()


config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.6142131979695432


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.6346,0.631335,0.623037,0.623037


Params: {'learning_rate': 1e-06, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.6230366492146597


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.6321243523316064


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.5827,0.597549,0.646597,0.677804


Params: {'learning_rate': 1e-06, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.6747572815533981


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.6923076923076923


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.5163,0.57845,0.664921,0.690821


Params: {'learning_rate': 1e-06, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.6939759036144578


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.7121951219512195


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.4665,0.573676,0.685864,0.710145


Params: {'learning_rate': 1e-06, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.714975845410628


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.7104622871046228


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.7101449275362319


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.714975845410628


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.7255369928400954


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.727710843373494


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.7299270072992701


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.7317073170731707


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-06, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.7228915662650603


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.7070707070707071


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.2014,0.931062,0.704188,0.718204


Params: {'learning_rate': 5e-06, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.7192118226600985


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.7207637231503579


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.0749,1.629211,0.696335,0.699482


Params: {'learning_rate': 5e-06, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.6969696969696969


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.7076923076923076


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.0168,2.279792,0.691099,0.691099


Params: {'learning_rate': 5e-06, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.7178217821782177


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.727710843373494


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.0106,2.886394,0.685864,0.695431


Params: {'learning_rate': 5e-06, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.6870229007633588


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.6984924623115579


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.7044334975369458


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.7047619047619048


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.702439024390244


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.7032418952618454


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.7111111111111111


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.6994818652849741


Step,Training Loss,Validation Loss


Params: {'learning_rate': 5e-06, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.7064676616915422


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.7037974683544304


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.0212,3.062853,0.71466,0.744731


Params: {'learning_rate': 1e-05, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.721153846153846


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.730593607305936


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.0286,2.436673,0.735602,0.761229


Params: {'learning_rate': 1e-05, 'batch_size': 16, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.7233009708737863


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.7277227722772278


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.0191,2.940755,0.709424,0.733813


Params: {'learning_rate': 1e-05, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.7272727272727273


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.7445887445887447


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.0258,3.307891,0.706806,0.726829


Params: {'learning_rate': 1e-05, 'batch_size': 16, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.6977886977886977


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.7205882352941178


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.7106598984771574


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.7268292682926829


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 32, 'weight_decay': 0.0001, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.6904109589041096


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 2}, F1 Score: 0.7167919799498746


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 100, 'num_epochs': 3}, F1 Score: 0.7227722772277227


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 2}, F1 Score: 0.7209876543209877


Step,Training Loss,Validation Loss


Params: {'learning_rate': 1e-05, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 3}, F1 Score: 0.7517084282460138
Best F1 Score: 0.7517084282460138
Best Params: {'learning_rate': 1e-05, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 3}

Training final model with best parameters...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111294421111274, max=1.0)…

Step,Training Loss,Validation Loss



Final Model Results: {'eval_loss': 0.5825455784797668, 'eval_accuracy': 0.6780104712041884, 'eval_f1_score': 0.6870229007633588, 'eval_runtime': 0.5511, 'eval_samples_per_second': 693.12, 'eval_steps_per_second': 10.887, 'epoch': 3.0}


In [9]:
best_params = {'learning_rate': 1e-05, 'batch_size': 32, 'weight_decay': 0.01, 'warmup_steps': 500, 'num_epochs': 3}

### News HeadLines Dataset

In [13]:
def prepare_datasets(tokenizer):
    train = pd.read_csv('/kaggle/input/anlp-datasets/preprocessed data/news_headlines/news_headlines_train.csv')
    val = pd.read_csv('/kaggle/input/anlp-datasets/preprocessed data/news_headlines/news_headlines_dev.csv')

    train_tweets = train['preprocessed_text'].tolist()
    train_labels = train['label'].apply(lambda x: 1 if x != 0 else 0).tolist()
    val_tweets = val['preprocessed_text'].tolist()
    val_labels = val['label'].apply(lambda x: 1 if x != 0 else 0).tolist()

    train_encodings = tokenizer(train_tweets, truncation=True, padding=True, max_length=128, return_tensors='pt')
    val_encodings = tokenizer(val_tweets, truncation=True, padding=True, max_length=128, return_tensors='pt')

    train_dataset = SarcasmDataset(train_encodings, train_labels)
    val_dataset = SarcasmDataset(val_encodings, val_labels)

    label_counts = pd.Series(train_labels).value_counts()
    total_samples = len(train_labels)
    class_weights = torch.tensor([
        total_samples / (len(label_counts) * label_counts[0]),
        total_samples / (len(label_counts) * label_counts[1])
    ], dtype=torch.float32)

    return train_dataset, val_dataset, class_weights

In [15]:
def prepare_news_headlines_dataset(tokenizer):
    """Prepare the dataset for the news headlines."""
    news_data = pd.read_csv('/kaggle/input/anlp-datasets/preprocessed data/news_headlines/news_headlines_dev.csv')
    
    news_tweets = news_data['preprocessed_text'].tolist()
    news_labels = news_data['label'].apply(lambda x: 1 if x != 0 else 0).tolist()

    news_encodings = tokenizer(news_tweets, truncation=True, padding=True, max_length=128, return_tensors='pt')
    news_dataset = SarcasmDataset(news_encodings, news_labels)

    return news_dataset

def train_on_news_headlines(best_params):
    """Train and evaluate the model on the news headlines dataset using the best parameters."""
    print("\nTraining on News Headlines Dataset...")

    model_name = "cardiffnlp/twitter-roberta-base-sentiment"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,
        ignore_mismatched_sizes=True
    )

    train_dataset, val_dataset, class_weights = prepare_datasets(tokenizer)
    news_dataset = prepare_news_headlines_dataset(tokenizer)

    training_args = TrainingArguments(
        output_dir='./final_model_news',
        evaluation_strategy="steps",
        eval_steps=500,
        num_train_epochs=best_params["num_epochs"],
        per_device_train_batch_size=best_params["batch_size"],
        per_device_eval_batch_size=best_params["batch_size"] * 2,
        warmup_steps=best_params["warmup_steps"],
        weight_decay=best_params["weight_decay"],
        learning_rate=best_params["learning_rate"],
        max_grad_norm=1.0,
        save_strategy="epoch",
        save_total_limit=1,
        logging_dir='./logs_news',
        logging_steps=500,
    )

    trainer = CustomTrainer(
        class_weights=class_weights,
        model=model,
        args=training_args,
        train_dataset=news_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    news_results = trainer.evaluate()
    print(f"\nResults on News Headlines Dataset: {news_results}")

    trainer.save_model('./sarcasm_detector_model_news')
    tokenizer.save_pretrained('./sarcasm_detector_model_news')

# Update the main function to include training on news headlines dataset
def main():
    # best_params = grid_search()
    # print("\nTraining final model with best parameters...")

    model_name = "cardiffnlp/twitter-roberta-base-sentiment"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2,
        ignore_mismatched_sizes=True
    )

    train_dataset, val_dataset, class_weights = prepare_datasets(tokenizer)

    training_args = TrainingArguments(
        output_dir='./final_model',
        evaluation_strategy="steps",
        eval_steps=500,
        num_train_epochs=best_params["num_epochs"],
        per_device_train_batch_size=best_params["batch_size"],
        per_device_eval_batch_size=best_params["batch_size"] * 2,
        warmup_steps=best_params["warmup_steps"],
        weight_decay=best_params["weight_decay"],
        learning_rate=best_params["learning_rate"],
        max_grad_norm=1.0,
        save_strategy="epoch",
        save_total_limit=1
    )

    trainer = CustomTrainer(
        class_weights=class_weights,
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    final_results = trainer.evaluate()
    print(f"\nFinal Model Results: {final_results}")

    trainer.save_model('./sarcasm_detector_model_tuned')
    tokenizer.save_pretrained('./sarcasm_detector_model_tuned')

    # Train and evaluate on news headlines dataset
    train_on_news_headlines(best_params)

if __name__ == '__main__':
    main()


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Accuracy,F1 Score
500,0.4873,0.243117,0.901535,0.895676
1000,0.218,0.213803,0.921378,0.907895
1500,0.1626,0.178787,0.940846,0.933669
2000,0.1216,0.199604,0.93523,0.925847



Final Model Results: {'eval_loss': 0.1996079981327057, 'eval_accuracy': 0.9352302508423811, 'eval_f1_score': 0.9258465495070723, 'eval_runtime': 2.8084, 'eval_samples_per_second': 951.077, 'eval_steps_per_second': 14.955, 'epoch': 3.0}

Training on News Headlines Dataset...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss



Results on News Headlines Dataset: {'eval_loss': 0.32278144359588623, 'eval_accuracy': 0.8723324597529015, 'eval_f1_score': 0.8597285067873303, 'eval_runtime': 2.7726, 'eval_samples_per_second': 963.346, 'eval_steps_per_second': 15.148, 'epoch': 3.0}
