In [4]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from compute_metrics import compute_metrics
from TweetDataset import TweetDataset
from transformers import EarlyStoppingCallback

In [5]:
MODELS = [  'roberta-large',
            'google/electra-large-discriminator',
            'tner/roberta-large-tweetner7-all',
            'tner/bertweet-large-tweetner7-all',
            'cardiffnlp/twitter-roberta-large-2022-154m',
        ]

batch_size = 4
metric_name = "f1"

EPOCHS = 50

train_df = pd.read_csv('../data/finetune_train_val_test/train.csv')
validate_df = pd.read_csv('../data/finetune_train_val_test/validate.csv')

In [6]:
for model_name in MODELS:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                        num_labels=5,
                                                        problem_type="multi_label_classification",
                                                        ignore_mismatched_sizes=True)
    if tokenizer.pad_token is None:
        tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        model.resize_token_embeddings(len(tokenizer))

    train_dataset = TweetDataset(tokenizer, 256, train_df)
    val_dataset = TweetDataset(tokenizer, 256, validate_df)

    training_arguments = TrainingArguments(
        output_dir='./results/',
        evaluation_strategy = "epoch",
        save_strategy = "epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=EPOCHS,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model= metric_name,
        save_total_limit = 3,
        warmup_steps=100,
        logging_dir='./logs',
        logging_steps=10
    )

    trainer = Trainer(
        model,
        training_arguments,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer= tokenizer,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
    )

    trainer.train()

    trainer.save_model(model_name)

    del(model)

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classi

Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 