In [None]:
import numpy as np

from transformers import AlbertTokenizer
from transformers import DataCollatorWithPadding
from transformers import TrainingArguments, Trainer
from transformers import AlbertForSequenceClassification
from transformers import EarlyStoppingCallback

from datasets import load_dataset
from sklearn.metrics import accuracy_score, f1_score

In [None]:
dataset = load_dataset(
    "csv",
    data_files={
        "train": "../Datasets/TrainSentiment.csv",
        "validation": "../Datasets/ValidationSentiment.csv"
        "test": "../Datasets/TestSentiment.csv"
    },
)
dataset

In [None]:
checkpoint = "albert-base-v2"
tokenizer = AlbertTokenizer.from_pretrained(checkpoint)

In [None]:
def tokenize_fn(batch):
    return tokenizer(batch['sentence'], truncation=True)

In [None]:
tokenized_dataset = dataset.map(tokenize_fn, batched=True)

In [None]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
def compute_metrics(logits_and_labels):
    logits, labels = logits_and_labels
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average="macro")
    return {"accuracy": acc, "f1-score": f1}

In [None]:
model = AlbertForSequenceClassification.from_pretrained(
    checkpoint, num_labels=3
)

In [None]:
training_args = TrainingArguments(
    output_dir="../ClassifierModels/ALBERT",
    weight_decay=0.01,
    learning_rate=2e-5,
    num_train_epochs=10,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    save_strategy="epoch",
    evaluation_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
)

In [None]:
early_stopping = EarlyStoppingCallback(early_stopping_patience=1)

In [None]:
trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[early_stopping]
)

In [None]:
trainer.train()