In [14]:
import pandas as pd
from transformers import AutoTokenizer
from datasets import Dataset
import torch
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
#load dataset
train_df = pd.read_csv('/content/drive/MyDrive/combined_train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/combined_test.csv')
val_df = pd.read_csv('/content/drive/MyDrive/combined_val.csv')

In [6]:
train_df.head()

Unnamed: 0,text_cleaned,language,directness_label,target_label,group_label,hatespeech,hatespeech.1,directness_label.1,target_label.1,group_label.1,sentiment_anger,sentiment_confusion,sentiment_disgust,sentiment_fear,sentiment_indifference,sentiment_normal,sentiment_sadness,sentiment_shock,language_encoded
0,يلعن شرفك يا حكم!!!,arabic,2,2,12,0,0,2,2,12,0,0,0,0,0,1,0,0,2
1,RT @J23app: Feb 2015 Jordan 4 Retro Remastered...,english,2,2,12,0,0,2,2,12,0,0,0,0,0,1,0,0,0
2,Blague : Mec : Je vais me laisser pousser la m...,french,2,2,12,0,0,2,2,12,0,0,0,0,0,1,0,0,1
3,I just put down a pan of brownies,english,2,2,12,0,0,2,2,12,0,0,0,0,0,1,0,0,0
4,في بني آدم عنده عقل بنزل مادة مقدمة في علم الم...,arabic,2,2,12,0,0,2,2,12,0,0,0,0,0,1,0,0,2


In [7]:
train_df['target_label'].unique()

array([2, 1, 4, 3, 0, 6, 5])

In [8]:
train_df = train_df.dropna(subset=["text_cleaned"])
test_df = test_df.dropna(subset=["text_cleaned"])
val_df = val_df.dropna(subset=["text_cleaned"])

In [9]:
train_df = train_df[['text_cleaned', 'target_label']]
test_df = test_df[['text_cleaned', 'target_label']]
val_df = val_df[['text_cleaned', 'target_label']]

In [10]:
#convert to HuggingFace dataset / format
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)
val_dataset = Dataset.from_pandas(val_df)

In [11]:
train_dataset = train_dataset.rename_column("target_label", "labels")
val_dataset = val_dataset.rename_column("target_label", "labels")
test_dataset = test_dataset.rename_column("target_label", "labels")


In [12]:
#tokenize
MODEL_NAME = "xlm-roberta-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize(batch):
  return tokenizer(batch["text_cleaned"],
                   truncation = True,
                   padding = "max_length",
                   max_length = 128)

train_dataset = train_dataset.map(tokenize, batched =True)
test_dataset = test_dataset.map(tokenize, batched =True)
val_dataset = val_dataset.map(tokenize, batched =True)

#format for pytorch
train_dataset.set_format(type ="torch", columns = ["input_ids", "attention_mask", "labels"] )
test_dataset.set_format(type ="torch", columns = ["input_ids", "attention_mask", "labels"] )
val_dataset.set_format(type ="torch", columns = ["input_ids", "attention_mask", "labels"] )

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

Map:   0%|          | 0/30695 [00:00<?, ? examples/s]

Map:   0%|          | 0/3901 [00:00<?, ? examples/s]

Map:   0%|          | 0/3900 [00:00<?, ? examples/s]

In [13]:
#load model
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels = 7 #since we have 7 target labels
)

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
#Set up the trainer

def compute_metrics(pred):
    logits = pred.predictions
    labels = pred.label_ids
    preds = logits.argmax(axis=-1)

    return {
        "accuracy": accuracy_score(labels, preds),
        "f1_macro": f1_score(labels, preds, average="macro"),
        "f1_micro": f1_score(labels, preds, average="micro"),
        "precision_macro": precision_score(labels, preds, average="macro", zero_division=0),
        "recall_macro": recall_score(labels, preds, average="macro", zero_division=0),
        "precision_micro": precision_score(labels, preds, average="micro", zero_division=0),
        "recall_micro": recall_score(labels, preds, average="micro", zero_division=0),
    }

training_args = TrainingArguments(
      output_dir="./results",
      eval_strategy = "epoch",
      save_strategy = "epoch",
      learning_rate= 2e-5,
      per_device_train_batch_size= 8,
      per_device_eval_batch_size= 8,
      num_train_epochs=4,
      weight_decay=0.01,
      logging_dir="./logs"
  )

trainer = Trainer(
      model=model,
      args = training_args,
      train_dataset = train_dataset,
      eval_dataset = test_dataset,
      tokenizer = tokenizer,
      compute_metrics = compute_metrics
  )

  trainer = Trainer(


In [1]:
pip install -U transformers




In [16]:
trainer.train()



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mnadaa_moharram[0m ([33mnadaa_moharram-university-of-california-berkeley[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,F1 Micro,Precision Macro,Recall Macro,Precision Micro,Recall Micro
1,0.6004,0.594339,0.804922,0.610852,0.804922,0.591153,0.636201,0.804922,0.804922
2,0.53,0.511301,0.808767,0.665141,0.808767,0.680482,0.661558,0.808767,0.808767
3,0.4808,0.523855,0.816457,0.690141,0.816457,0.678673,0.717597,0.816457,0.816457
4,0.4317,0.541942,0.818508,0.698147,0.818508,0.684585,0.717757,0.818508,0.818508


TrainOutput(global_step=15348, training_loss=0.5436702834649542, metrics={'train_runtime': 4462.0232, 'train_samples_per_second': 27.517, 'train_steps_per_second': 3.44, 'total_flos': 8076556408704000.0, 'train_loss': 0.5436702834649542, 'epoch': 4.0})

In [2]:
 !pip install --upgrade datasets

Collecting datasets
  Downloading datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-4.0.0-py3-none-any.whl (494 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.8/494.8 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, datasets
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
  Attempting uninstall: datasets
    Found existing installation: datasets 2.14.4
    Uninstalling datasets-2.14.4:
      Successfully uninstalled datasets-2.14.4
[31mERROR: pip's dependency r

In [None]:
import datasets.formatting.formatting as ds_fmt
import numpy as np

ds_fmt.np_array = np.asarray  # forces compatible fallback

In [None]:
trainer.train()

Step,Training Loss
500,0.4769
1000,0.5015
1500,0.5062
2000,0.4961
2500,0.518
3000,0.4655
3500,0.4746
4000,0.4612
4500,0.446
5000,0.4652


TrainOutput(global_step=15348, training_loss=0.41024161739712245, metrics={'train_runtime': 4338.5138, 'train_samples_per_second': 28.3, 'train_steps_per_second': 3.538, 'total_flos': 8076556408704000.0, 'train_loss': 0.41024161739712245, 'epoch': 4.0})

In [18]:
# /content/drive/MyDrive/Colab Notebooks/NLP_Datasci266/final_project/combined_train.csv
model.save_pretrained("./saved_xlmroberta")
tokenizer.save_pretrained("./saved_xlmroberta")


('./saved_xlmroberta/tokenizer_config.json',
 './saved_xlmroberta/special_tokens_map.json',
 './saved_xlmroberta/sentencepiece.bpe.model',
 './saved_xlmroberta/added_tokens.json',
 './saved_xlmroberta/tokenizer.json')

In [None]:
model.save_pretrained("/content/drive/MyDrive/Colab Notebooks/NLP_Datasci266/final_project./saved_xlmroberta")
tokenizer.save_pretrained("./saved_xlmroberta")

('./saved_xlmroberta/tokenizer_config.json',
 './saved_xlmroberta/special_tokens_map.json',
 './saved_xlmroberta/sentencepiece.bpe.model',
 './saved_xlmroberta/added_tokens.json',
 './saved_xlmroberta/tokenizer.json')

In [None]:
# to load model
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer

model = XLMRobertaForSequenceClassification.from_pretrained("./saved_xlmroberta")
tokenizer = XLMRobertaTokenizer.from_pretrained("./saved_xlmroberta")


In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

def compute_metrics(pred):
    logits = pred.predictions
    labels = pred.label_ids
    preds = logits.argmax(axis=-1)

    return {
        "accuracy": accuracy_score(labels, preds),
        "f1_macro": f1_score(labels, preds, average="macro"),
        "f1_micro": f1_score(labels, preds, average="micro"),
        "precision_macro": precision_score(labels, preds, average="macro", zero_division=0),
        "recall_macro": recall_score(labels, preds, average="macro", zero_division=0),
        "precision_micro": precision_score(labels, preds, average="micro", zero_division=0),
        "recall_micro": recall_score(labels, preds, average="micro", zero_division=0),
    }


In [None]:
trainer = Trainer(
      model=model,
      args = training_args,
      train_dataset = train_dataset,
      eval_dataset = test_dataset,
      tokenizer = tokenizer,
      compute_metrics = compute_metrics
  )

  trainer = Trainer(


In [17]:
trainer.evaluate(test_dataset)

{'eval_loss': 0.5419424772262573,
 'eval_accuracy': 0.8185080748526019,
 'eval_f1_macro': 0.6981468481883304,
 'eval_f1_micro': 0.8185080748526019,
 'eval_precision_macro': 0.6845850066264273,
 'eval_recall_macro': 0.7177568368739354,
 'eval_precision_micro': 0.8185080748526019,
 'eval_recall_micro': 0.8185080748526019,
 'eval_runtime': 24.0411,
 'eval_samples_per_second': 162.264,
 'eval_steps_per_second': 20.299,
 'epoch': 4.0}

In [None]:
preds = trainer.predict(test_dataset)
predicted_classes = preds.predictions.argmax(-1)

# If you have a mapping:
label_map = {
    0: "disability",
    1: "gender",
    2: "normal",
    3: "origin",
    4: "other",
    5: "religion",
    6: "sexual_orientation"

}
human_readable = [label_map[p] for p in predicted_classes]


# Hypertuning with Optuna

In [None]:
pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.4-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.4-py3-none-any.whl (247 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.0/247.0 kB[0m [31m24.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.4 colorlog-6.9.0 optuna-4.4.0


In [None]:
print(set(train_dataset['labels']))

{0, 1, 2, 3, 4, 5, 6}


In [None]:
import optuna
from transformers import (
    XLMRobertaForSequenceClassification,
    Trainer,
    TrainingArguments,
    AutoTokenizer,
)
from datasets import load_dataset, DatasetDict
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch
import os

# For debugging CUDA errors:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


In [None]:
import torch
from datasets import Dataset

# Assume you have train_dataset, val_dataset, test_dataset loaded as HuggingFace Datasets

def convert_labels_to_int(dataset):
    dataset.set_format(None)  # avoid numpy formatting issues
    dataset = dataset.map(lambda x: {"labels": int(x["labels"])})
    return dataset

    # Remove old 'labels' column and add new one with ints
    dataset = dataset.remove_columns("labels")
    dataset = dataset.add_column("labels", labels_int)
    return dataset

train_dataset = convert_labels_to_int(train_dataset)
val_dataset = convert_labels_to_int(val_dataset)
test_dataset = convert_labels_to_int(test_dataset)

# Now check the labels
print(f"Sample labels: {train_dataset['labels'][:10]}")
print(f"Unique labels: {set(train_dataset['labels'])}")
print(f"Min label: {min(train_dataset['labels'])}")
print(f"Max label: {max(train_dataset['labels'])}")

num_labels = len(set(train_dataset['labels']))

# Then tokenize and set format as usual
from transformers import XLMRobertaTokenizerFast

tokenizer = XLMRobertaTokenizerFast.from_pretrained("xlm-roberta-base")

def tokenize_function(examples):
    return tokenizer(examples["text_cleaned"], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

columns_to_return = ["input_ids", "attention_mask", "labels"]
# train_dataset.set_format(type="torch", columns=columns_to_return)
# val_dataset.set_format(type="torch", columns=columns_to_return)
# test_dataset.set_format(type="torch", columns=columns_to_return)
train_dataset.set_format(None)
val_dataset.set_format(None)
test_dataset.set_format(None)

# Load model with correct num_labels
from transformers import XLMRobertaForSequenceClassification

model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=num_labels)

# Training arguments etc...
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
)


from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

# Train
trainer.train()

# Evaluate
trainer.evaluate(test_dataset)


Map:   0%|          | 0/30695 [00:00<?, ? examples/s]

Map:   0%|          | 0/3900 [00:00<?, ? examples/s]

Map:   0%|          | 0/3901 [00:00<?, ? examples/s]

Sample labels: [2, 2, 2, 2, 2, 1, 2, 2, 4, 2]
Unique labels: {0, 1, 2, 3, 4, 5, 6}
Min label: 0
Max label: 6


Map:   0%|          | 0/30695 [00:00<?, ? examples/s]

Map:   0%|          | 0/3900 [00:00<?, ? examples/s]

Map:   0%|          | 0/3901 [00:00<?, ? examples/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6005,0.564926,0.8,0.803149,0.8,0.799547
2,0.4955,0.523382,0.802564,0.807066,0.802564,0.802855
3,0.4615,0.519714,0.811282,0.815758,0.811282,0.811627


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 0.5059531927108765,
 'eval_accuracy': 0.8138938733658037,
 'eval_precision': 0.8202043903916153,
 'eval_recall': 0.8138938733658037,
 'eval_f1': 0.8160661985306652,
 'eval_runtime': 25.1318,
 'eval_samples_per_second': 155.222,
 'eval_steps_per_second': 9.709,
 'epoch': 3.0}

In [19]:
import torch
from datasets import Dataset

# Assume you have train_dataset, val_dataset, test_dataset loaded as HuggingFace Datasets

def convert_labels_to_int(dataset):
    dataset.set_format(None)  # avoid numpy formatting issues
    dataset = dataset.map(lambda x: {"labels": int(x["labels"])})
    return dataset

    # Remove old 'labels' column and add new one with ints
    dataset = dataset.remove_columns("labels")
    dataset = dataset.add_column("labels", labels_int)
    return dataset

train_dataset = convert_labels_to_int(train_dataset)
val_dataset = convert_labels_to_int(val_dataset)
test_dataset = convert_labels_to_int(test_dataset)

# Now check the labels
print(f"Sample labels: {train_dataset['labels'][:10]}")
print(f"Unique labels: {set(train_dataset['labels'])}")
print(f"Min label: {min(train_dataset['labels'])}")
print(f"Max label: {max(train_dataset['labels'])}")

num_labels = len(set(train_dataset['labels']))

# Then tokenize and set format as usual
from transformers import XLMRobertaTokenizerFast

tokenizer = XLMRobertaTokenizerFast.from_pretrained("xlm-roberta-base")

def tokenize_function(examples):
    return tokenizer(examples["text_cleaned"], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

columns_to_return = ["input_ids", "attention_mask", "labels"]
# train_dataset.set_format(type="torch", columns=columns_to_return)
# val_dataset.set_format(type="torch", columns=columns_to_return)
# test_dataset.set_format(type="torch", columns=columns_to_return)
train_dataset.set_format(None)
val_dataset.set_format(None)
test_dataset.set_format(None)

# Load model with correct num_labels
from transformers import XLMRobertaForSequenceClassification

model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=num_labels)

# Training arguments etc...
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="f1_micro",
)


from sklearn.metrics import accuracy_score, precision_recall_fscore_support


def compute_metrics(pred):
    logits = pred.predictions
    labels = pred.label_ids
    preds = logits.argmax(axis=-1)

    return {
        "accuracy": accuracy_score(labels, preds),
        "f1_macro": f1_score(labels, preds, average="macro"),
        "f1_micro": f1_score(labels, preds, average="micro"),
        "precision_macro": precision_score(labels, preds, average="macro", zero_division=0),
        "recall_macro": recall_score(labels, preds, average="macro", zero_division=0),
        "precision_micro": precision_score(labels, preds, average="micro", zero_division=0),
        "recall_micro": recall_score(labels, preds, average="micro", zero_division=0),
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

# Train
trainer.train()

# Evaluate
trainer.evaluate(test_dataset)


Map:   0%|          | 0/30695 [00:00<?, ? examples/s]

Map:   0%|          | 0/3900 [00:00<?, ? examples/s]

Map:   0%|          | 0/3901 [00:00<?, ? examples/s]

Sample labels: [2, 2, 2, 2, 2, 1, 2, 2, 4, 2]
Unique labels: {0, 1, 2, 3, 4, 5, 6}
Min label: 0
Max label: 6


Map:   0%|          | 0/30695 [00:00<?, ? examples/s]

Map:   0%|          | 0/3900 [00:00<?, ? examples/s]

Map:   0%|          | 0/3901 [00:00<?, ? examples/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,F1 Micro,Precision Macro,Recall Macro,Precision Micro,Recall Micro
1,0.6043,0.578947,0.791026,0.609784,0.791026,0.581632,0.64911,0.791026,0.791026
2,0.4918,0.516917,0.799744,0.645938,0.799744,0.691209,0.658275,0.799744,0.799744
3,0.466,0.513317,0.804872,0.670668,0.804872,0.670248,0.682833,0.804872,0.804872


{'eval_loss': 0.5027871131896973,
 'eval_accuracy': 0.8162009741092028,
 'eval_f1_macro': 0.679939922727474,
 'eval_f1_micro': 0.8162009741092028,
 'eval_precision_macro': 0.6856255679005739,
 'eval_recall_macro': 0.6872065218618523,
 'eval_precision_micro': 0.8162009741092028,
 'eval_recall_micro': 0.8162009741092028,
 'eval_runtime': 27.0423,
 'eval_samples_per_second': 144.255,
 'eval_steps_per_second': 9.023,
 'epoch': 3.0}

In [20]:
model.save_pretrained("./saved_xlmroberta_hyper")
tokenizer.save_pretrained("./saved_xlmroberta_hyper")


('./saved_xlmroberta_hyper/tokenizer_config.json',
 './saved_xlmroberta_hyper/special_tokens_map.json',
 './saved_xlmroberta_hyper/sentencepiece.bpe.model',
 './saved_xlmroberta_hyper/added_tokens.json',
 './saved_xlmroberta_hyper/tokenizer.json')