In [None]:
from datasets import load_dataset
import pandas as pd
from sklearn.model_selection import train_test_split
import torch

In [None]:
case = 'ag_news'
train_and_valid = load_dataset(case, split='train')
test = load_dataset(case, split='test')

print("Train + validation dataset shape: {}".format(train_and_valid.shape))
print("Test dataset shape: {}".format(test.shape))

Using custom data configuration default
Reusing dataset ag_news (/root/.cache/huggingface/datasets/ag_news/default/0.0.0/fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Using custom data configuration default
Reusing dataset ag_news (/root/.cache/huggingface/datasets/ag_news/default/0.0.0/fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)


Train + validation dataset shape: (120000, 2)
Test dataset shape: (7600, 2)


## Convert to pandas

In [None]:
train_and_valid_df = pd.DataFrame.from_dict(train_and_valid)[['text', 'label']]
train_and_valid_df.columns = ['text', 'labels']   # Rename per simpletransformers format
test_df = pd.DataFrame.from_dict(test)[['text', 'label']]
test_df.columns = ['text', 'labels']    # Rename per simpletransformers format

In [None]:
# Stratify and sample to produce train/validation data
train_df, valid_df = train_test_split(
    train_and_valid_df, stratify=train_and_valid_df['labels'], test_size=0.1, random_state=345246
)

print("Train + valid dataset shape: {}".format(train_and_valid_df.shape))
print("Train dataset shape: {}".format(train_df.shape))
print("Valid dataset shape: {}".format(valid_df.shape))
print("Test dataset shape: {}".format(test_df.shape))

Train + valid dataset shape: (120000, 2)
Train dataset shape: (108000, 2)
Valid dataset shape: (12000, 2)
Test dataset shape: (7600, 2)


In [None]:
from simpletransformers.classification import ClassificationModel

args = {
    "fp16": True, # floating point 16 - by default, fp 64. We're reducing the size to 16 bits for perf+speed. #tradeoff
    "fp16_opt_level": "O1",
    "max_seq_length": 512,  # push up to 512 (max allowed for BERT) if possible
    "train_batch_size": 8, # try increasing, but not too much (might conflict w padding & seq length)
    "gradient_accumulation_steps": 1, # trick to reduce GPU mem usage without explicitly increasing batch size
    "eval_batch_size": 8,
    "num_train_epochs": 1,
    "weight_decay": 0.0,  # L2 regularization
    "learning_rate": 3e-5,  # keep learning rate low, similar to base BERT best practices
    "adam_epsilon": 1e-8,
    "warmup_ratio": 0.1,
    "warmup_steps": 0,
    "max_grad_norm": 1.0,
    'do_lower_case': True,  # ALBERT pre-trained is case-insensitive
    'sliding_window': False,
    'evaluate_during_training': True,
    'overwrite_output_dir': True,
    'reprocess_input_data': True,
    'save_model_every_epoch': True,
    'save_optimizer_and_scheduler': True,        
}

# Create a ClassificationModel
#To load a model a previously saved model instead of a default model, 
#you can change the model_name to the path to a directory which contains a saved model.
model = ClassificationModel('albert', 'albert-base-v2', use_cuda=True, num_labels=4, args=args)

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'predictions.dense.weight', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.decoder.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You sho

In [None]:
model.train_model(train_df=train_df, eval_df=valid_df, show_running_loss=True)

HBox(children=(FloatProgress(value=0.0, max=108000.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=1.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 1', max=13500.0, style=ProgressStyle(d…







(13500,
 {'eval_loss': [0.3882914542003224,
   0.3800998780045969,
   0.33208188170062686,
   0.32524872810599237,
   0.3097142320672671,
   0.3025210974177268,
   0.2728515487052888],
  'global_step': [2000, 4000, 6000, 8000, 10000, 12000, 13500],
  'mcc': [0.8584522706037873,
   0.8750779076998001,
   0.8881046667431082,
   0.8974429475486536,
   0.9044631876004755,
   0.9118918473842377,
   0.9123391189369487],
  'train_loss': [0.018739324063062668,
   0.6463265419006348,
   0.006130380090326071,
   0.4023725986480713,
   0.008016820065677166,
   0.5739859938621521,
   0.738547146320343]})

In [None]:
predictions, raw_outputs = model.predict(["Chemicals company showcases new scientific breakthroughs."])
print(predictions, raw_outputs)

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


[3] [[-0.72705078 -3.1875      0.39282227  4.1171875 ]]


In [None]:
result, model_outputs, _ = model.eval_model(test_df)
print(result, model_outputs)

HBox(children=(FloatProgress(value=0.0, max=7600.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=950.0, style=ProgressStyle(descr…


{'mcc': 0.9137723647026577, 'eval_loss': 0.2754991495851917} [[ 0.12225342 -2.76171875  4.52734375 -1.13867188]
 [-1.2421875  -3.27929688 -1.59570312  6.171875  ]
 [-0.68701172 -3.04101562 -1.69628906  5.859375  ]
 ...
 [-1.53515625  6.26953125 -2.45507812 -2.88671875]
 [ 0.66601562 -2.01757812  2.59179688 -0.46704102]
 [-2.58398438 -3.50390625  2.15625     4.75390625]]


In [None]:
path = f"./albert_ag-news_120k_model" 
model.save_model(output_dir=path, model=model.model)