From https://pub.towardsai.net/text-classification-with-simple-transformers-a29d13358135

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import os
from sklearn.metrics import f1_score, accuracy_score, precision_score
from sklearn.model_selection import train_test_split
from simpletransformers.classification import ClassificationModel, ClassificationArgs

In [None]:
os.environ['TOKENIZERS_PARALLELISM'] =  'True'
train = pd.read_csv('./data/train.csv')
test = pd.read_csv('./data/test.csv')
print(f'Shape of train set {train.shape}')
print(f'Shape of test set {test.shape}')
train.drop(['id', 'keyword', 'location'], axis=1, inplace=True)
test.drop(['id', 'keyword', 'location'], axis=1, inplace=True)
train.columns = ['text', 'labels']
train_df, valid_df = train_test_split(train, test_size=0.2, stratify=train['labels'], random_state=42)
sns.countplot(x=train['labels'])

In [None]:
model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir=True, manual_seed=42, best_model_dir='./outputs_roberta/', evaluate_during_training=True)
model = ClassificationModel(model_type='roberta', model_name='roberta-base', use_cuda=False, num_labels=2, args=model_args)
model.train_model(train_df, eval_df=valid_df, acc=accuracy_score)

In [None]:
result, model_outputs, wrong_preds = model.eval_model(valid_df, acc=accuracy_score, f1=f1_score, precision=precision_score)
predictions = np.argmax(model_outputs, axis=-1)
print(result)
print('f1 score:', f1_score(valid_df['labels'], predictions), 'accuracy', accuracy_score(valid_df['labels'], predictions))

In [None]:
loaded_model = ClassificationModel("roberta", "./outputs_roberta/", use_cuda=False)
result, model_outputs, wrong_preds = loaded_model.eval_model(valid_df)
predictions = np.argmax(model_outputs, axis=-1)
print('f1 score:', f1_score(valid_df['labels'], predictions), 'accuracy', accuracy_score(valid_df['labels'], predictions))