# XLNet Classification example

In [1]:
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings("ignore")

import torch

## Define dictionary containing all model and environmental parameters

In [2]:
task_name  = 'xlnet_128' 
params = {
    'data_dir': 'data/',                              # Directory of train.tsv, evaluation.tsv and test.tsv (optional)
    'params_dict_dir' : f'{task_name}_params.json',   # We save the dict here. It will be overwritten with more info. 
    'model_type':  'xlnet',                           # Defines the model type
    'model': 'xlnet-base-cased',                      # Defines the exact model
    'task_name': 'binary',
    'output_dir': f'outputs/{task_name}/',            # The output will be saved in the output/bert_128 directory
    'cache_dir': 'cache/',                            
    'do_train': True,                                 # Need to specify True for training
    'do_eval': False,                                 # Need to specify True for evaluation
    'evaluate_during_training': False,
    'fp16': False,                                    # If True, decrease precision of all calculation (needs apex package)
    'fp16_opt_level': 'O1',                           # Precision level (only relevant if fp16 is defined True)
    'max_seq_len': 128,                               # Max text taken for one sentence
    'output_mode': 'classification',
    'train_batch_size': 16,
    'eval_batch_size': 16,

    'gradient_accumulation_steps': 1,
    'num_train_epochs': 1,
    'weight_decay': 0,
    'learning_rate': 4e-5,
    'adam_epsilon': 1e-8,
    'warmup_ratio': 0.06,
    'warmup_steps': 0,
    'max_grad_norm': 1.0,

    'logging_steps': 50,
    'evaluate_during_training': False,
    'save_steps': 2000,
    'eval_all_checkpoints': True,

    'overwrite_output_dir': False,
    'reprocess_input_data': True,
    'notes': 'Using IMDB dataset',
    'device': torch.device("cuda" if torch.cuda.is_available() else "cpu")
}

### Add tokenizer to the model

In [3]:
from pytorch_transformers import (WEIGHTS_NAME, BertConfig, BertForSequenceClassification, BertTokenizer,
                                  XLMConfig, XLMForSequenceClassification, XLMTokenizer, 
                                  XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer,
                                  RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer)
MODEL_CLASSES = {
    'bert': (BertConfig, BertForSequenceClassification, BertTokenizer),
    'xlnet': (XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer),
    'xlm': (XLMConfig, XLMForSequenceClassification, XLMTokenizer),
    'roberta': (RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer)
}

config_class, model_class, tokenizer_class = MODEL_CLASSES[params['model_type']]

config = config_class.from_pretrained(params['model'], num_labels=2, finetuning_task=params['task_name'])
tokenizer = tokenizer_class.from_pretrained(params['model'])
params["tokenizer"] = tokenizer

## Creating output and reports dir 

In [4]:
from modules.outils import *

create_reports_directory(params)
create_output_directory(params)

Output directory (outputs/xlnet_128/) already exists and is not empty.


## Process data
### Create features from the data 

In [6]:
from modules.process import create_features

features, params = create_features(params, 'train')
eval_features, params = create_features(params, 'evaluation')
val_features, params = create_features(params, 'evaluation')

Loading features from cached file data/train_features_128_xlnet-base-cased..
Loading is finished from file data/train_features_128_xlnet-base-cased..
Adding train_example_length to the parameters dict and saving it in xlnet_128_params.json
Loading features from cached file data/evaluation_features_128_xlnet-base-cased..
Loading is finished from file data/evaluation_features_128_xlnet-base-cased..
Adding evaluation_example_length to the parameters dict and saving it in xlnet_128_params.json
Loading features from cached file data/evaluation_features_128_xlnet-base-cased..
Loading is finished from file data/evaluation_features_128_xlnet-base-cased..
Adding evaluation_example_length to the parameters dict and saving it in xlnet_128_params.json


### Create dataloaders

In [13]:
from modules.process import create_dataloader

# Create dataloader for train data
dataloader = create_dataloader(features, params, evaluation=False)
dataloader_name = f'{params["data_dir"]}dataloader_{params["max_seq_len"]}_{params["model"]}'
torch.save(dataloader, dataloader_name)

# Create dataloader for eval data
eval_dataloader, all_label_ids = create_dataloader(eval_features, params, evaluation=True)
eval_dataloader_name = f'{params["data_dir"]}eval_dataloader_{params["max_seq_len"]}_{params["model"]}'
torch.save(eval_dataloader, eval_dataloader_name)
torch.save(all_label_ids, f'{params["data_dir"]}eval_label_ids')
           
# Create dataloader for eval data
val_dataloader, all_label_ids = create_dataloader(val_features, params, evaluation=True)
val_dataloader_name = f'{params["data_dir"]}val_dataloader_{params["max_seq_len"]}_{params["model"]}'
torch.save(val_dataloader, val_dataloader_name)
torch.save(all_label_ids, f'{params["data_dir"]}eval_label_ids')

In [8]:
params['train_examples_length'] = len(features)
params['eval_examples_length'] = len(eval_features)

## Load model

In [9]:
model = model_class.from_pretrained(params['model'])
model.to(params["device"])

XLNetForSequenceClassification(
  (transformer): XLNetModel(
    (word_embedding): Embedding(32000, 768)
    (layer): ModuleList(
      (0): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (layer_1): Linear(in_features=768, out_features=3072, bias=True)
          (layer_2): Linear(in_features=3072, out_features=768, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (1): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e

# Training
Here I run training without evaluation. 
If wish to evaluate during training, define a third data, 'val_data.tsv', process it the same way as evaluation (create_dataloader) and pass it as eval_dataloader argument into the 'train' function. 

I did not evaluate during training as training was long enough even without evaluation. 

In [17]:
from modules.model_functions import train

params['evaluate_during_training'] = False

if params['do_train'] and params['evaluate_during_training']: 
    model, train_loss_set, global_step, tr_loss = train(dataloader, model, params, val_dataloader)
elif params['do_train']:
    model, train_loss_set, global_step, tr_loss = train(dataloader, model, params, None)

INFO:modules.model_functions:***** Running training *****
INFO:modules.model_functions:  Num examples = 25000
INFO:modules.model_functions:  Num Epochs = 1
INFO:modules.model_functions:  Total train batch size  = 16
INFO:modules.model_functions:  Gradient Accumulation steps = 1
INFO:modules.model_functions:  Total optimization steps = 25000




Epoch:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A

HBox(children=(IntProgress(value=0, description='Iteration', max=1563, style=ProgressStyle(description_width='…

0.517068





Epoch: 100%|██████████| 1/1 [29:38<00:00, 1778.18s/it][A[A[A[A







# Evaluation

In [19]:
from modules.model_functions import evaluate

result = evaluate(model, eval_dataloader, params, prefix="")

HBox(children=(IntProgress(value=0, description='Evaluating', max=1563, style=ProgressStyle(description_width=…

INFO:modules.model_functions:***** Eval results  *****
INFO:modules.model_functions:  accuracy = 0.49984
INFO:modules.model_functions:  eval_loss = 0.7574610095220884
INFO:modules.model_functions:  f1-score = 0.6664176715398571
INFO:modules.model_functions:  fn = 10
INFO:modules.model_functions:  fp = 12494
INFO:modules.model_functions:  logloss = 15.83051898565548
INFO:modules.model_functions:  mcc = -0.006326580150009362
INFO:modules.model_functions:  model = xlnet-base-cased
INFO:modules.model_functions:  precision = 0.499919948767211
INFO:modules.model_functions:  recall = 0.9992
INFO:modules.model_functions:  task_name = binary
INFO:modules.model_functions:  tn = 6
INFO:modules.model_functions:  tp = 12490



