# Joint Modeling Notebook

Run the following cells to train a joint classifier

## 0. Imports

In [1]:
%load_ext autoreload
%autoreload 2
    
%load_ext tensorboard

import sys
sys.path.append('../jointclassifier/')
from joint_args import ModelArguments, DataTrainingArguments, TrainingArguments
from joint_dataloader import load_dataset
from joint_trainer import JointTrainer
from single_trainer import SingleTrainer
from joint_model_v1 import JointSeqClassifier

from transformers import HfArgumentParser, AutoConfig, AutoTokenizer
import os

## 1. Initialize the Arguments

In [2]:
task = "abstract+shakespeare"
data_dir = "../data/processed_filtered/"
model_name = "distilbert-base-uncased"
model_nick = "distilbert_uncased_2"
output_dir = "../models/"
freeze_encoder = "False"
skip_preclassifier = "False"
train_jointly = "True"
epochs = "3"
train_batch_size = "256"
eval_batch_size = "512"
log_save_steps = "200"

parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
model_args, data_args, training_args = parser.parse_args_into_dataclasses([
    "--model_name_or_path",
    model_name,
    "--model_nick",
    model_nick,
    "--task",
    task,
    "--data_dir",
    data_dir,
    "--output_dir",
    os.path.join(output_dir, model_nick, task, 'joint'),
    "--cache_dir",
    os.path.join(output_dir,"cache"),
    "--freeze_encoder",
    freeze_encoder,
    "--skip_preclassifier",
    skip_preclassifier,
    "--train_jointly",
    train_jointly,
    "--overwrite_cache",
    "--per_device_train_batch_size",
    train_batch_size,
    "--per_device_eval_batch_size",
    eval_batch_size,
    "--max_seq_len",
    "64",
    "--gradient_accumulation_steps",
    "1",
    "--num_train_epochs",
    epochs,
    "--logging_steps",
    log_save_steps,
    "--save_steps",
    log_save_steps
])


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


## 2. Load the Tokenizer

In [3]:
model_config = AutoConfig.from_pretrained(model_args.model_name_or_path, cache_dir=model_args.cache_dir)
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, cache_dir=model_args.cache_dir,
                                         model_max_length = data_args.max_seq_len)
    

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at ../models/cache/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.d423bdf2f58dc8b77d5f5d18028d7ae4a72dcfd8f468e81fe979ada957a8c361
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.4.0.dev0",
  "vocab_size": 30522
}

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at ../models/cache/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.d423bdf2f58dc8b77d5f5d18028d7ae4a7

## 3. Load the datasets 
Note : Single for Joint Training, Dict for Separate Training

In [4]:
tasks = data_args.task.split('+')
train_dataset, idx_to_classes = load_dataset(data_args.data_dir, tokenizer, model_name=model_args.model_name_or_path, 
                            tasks=tasks, mode="train", n_proc=6000)
dev_dataset, _ = load_dataset(data_args.data_dir, tokenizer, model_name=model_args.model_name_or_path, 
                            tasks=tasks, mode="dev", n_proc=6000)

100%|██████████| 14/14 [00:02<00:00,  6.23it/s]
 17%|█▋        | 1/6 [00:00<00:00,  7.59it/s]torch.Size([82119, 64]) torch.Size([82119, 64]) torch.Size([82119, 2]) torch.Size([82119])
100%|██████████| 6/6 [00:00<00:00,  6.94it/s]
  0%|          | 0/4 [00:00<?, ?it/s]torch.Size([116421, 64]) torch.Size([116421, 64]) torch.Size([116421, 2]) torch.Size([116421])
100%|██████████| 4/4 [00:00<00:00,  7.03it/s]
100%|██████████| 2/2 [00:00<00:00, 12.40it/s]torch.Size([20527, 64]) torch.Size([20527, 64]) torch.Size([20527, 2]) torch.Size([20527])
torch.Size([28286, 64]) torch.Size([28286, 64]) torch.Size([28286, 2]) torch.Size([28286])



## 4. Initialize the Trainer and the Model & Train!

In [5]:
# # Open TensorBoard
# %tensorboard --logdir runs

In [6]:
label_dims = {task : 1 if len(list(idx_to_classes[task].keys())) == 2 else len(list(idx_to_classes[task].keys())) for task in idx_to_classes}
label_dims

{'abstract': 1, 'shakespeare': 1}

In [7]:
print(f"Processing Joint Task : {tasks}")
model = JointSeqClassifier.from_pretrained(model_args.model_name_or_path,tasks=tasks, model_args=model_args,
                                                   task_if_single=None, joint = training_args.train_jointly,
                                                   label_dims=label_dims)
trainer = JointTrainer([training_args,model_args, data_args], model, train_dataset, dev_dataset, idx_to_classes)
trainer.train()

Processing Joint Task : ['abstract', 'shakespeare']
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /home/vivek/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.d423bdf2f58dc8b77d5f5d18028d7ae4a72dcfd8f468e81fe979ada957a8c361
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.4.0.dev0",
  "vocab_size": 30522
}

loading weights file https://huggingface.co/distilbert-base-uncased/resolve/main/pytorch_model.bin from cache at /home/vivek/.cache/huggingface/tran

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Iteration', max=235.0, style=ProgressStyle(description_wi…

***** Running Evaluation *****
Num examples = 29143
Total eval batch size = 1024


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=29.0, style=ProgressStyle(description_wid…

Configuration saved in ../models/distilbert_uncased_2/abstract+shakespeare/joint/config.json

Model weights saved in ../models/distilbert_uncased_2/abstract+shakespeare/joint/pytorch_model.bin
Saving model checkpoint to ../models/distilbert_uncased_2/abstract+shakespeare/joint
New best model saved at step 200, epoch 0: f1 = 0.8781784397783372



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=235.0, style=ProgressStyle(description_wi…

***** Running Evaluation *****
Num examples = 29143
Total eval batch size = 1024


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=29.0, style=ProgressStyle(description_wid…

Configuration saved in ../models/distilbert_uncased_2/abstract+shakespeare/joint/config.json

Model weights saved in ../models/distilbert_uncased_2/abstract+shakespeare/joint/pytorch_model.bin
Saving model checkpoint to ../models/distilbert_uncased_2/abstract+shakespeare/joint
New best model saved at step 400, epoch 1: f1 = 0.893147016528853



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=235.0, style=ProgressStyle(description_wi…

***** Running Evaluation *****
Num examples = 29143
Total eval batch size = 1024


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=29.0, style=ProgressStyle(description_wid…

Configuration saved in ../models/distilbert_uncased_2/abstract+shakespeare/joint/config.json

Model weights saved in ../models/distilbert_uncased_2/abstract+shakespeare/joint/pytorch_model.bin
Saving model checkpoint to ../models/distilbert_uncased_2/abstract+shakespeare/joint
New best model saved at step 600, epoch 2: f1 = 0.8947224948206303




(705, 0.29038770651140955)

## 5. Predict for a sentence

In [8]:
model = JointSeqClassifier.from_pretrained(training_args.output_dir,tasks=tasks, model_args=model_args,
                                                   task_if_single=None, joint = training_args.train_jointly,
                                                label_dims=label_dims)
trainer = JointTrainer([training_args,model_args, data_args], model, train_dataset, dev_dataset, idx_to_classes)

loading configuration file ../models/distilbert_uncased_2/shakespeare/joint/config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "JointSeqClassifier"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.4.0.dev0",
  "vocab_size": 30522
}

loading weights file ../models/distilbert_uncased_2/shakespeare/joint/pytorch_model.bin
All model checkpoint weights were used when initializing JointSeqClassifier.

All the weights of JointSeqClassifier were initialized from the model checkpoint at ../models/distilbert_uncased_2/shakespeare/joint.
If your task is similar to the task the model of the che

In [11]:
sentence = "Thou shalt open the door!"
trainer.predict_for_sentence(sentence, tokenizer)

{'shakespeare': {'class': 'shakespeare', 'prob': '0.99290293'}}

In [10]:
trainer.predict_for_sentence(sentence, tokenizer, salience=True)

{'shakespeare': {'class': 'noshakespeare',
  'prob': '0.008918622',
  'salience': ['0.13388419',
   '0.29176205',
   '0.22202438',
   '0.20978905',
   '0.22265504',
   '0.24728881']}}

In [36]:
trainer.evaluate()

***** Running Evaluation *****
Num examples = 117806
Total eval batch size = 1024


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=116.0, style=ProgressStyle(description_wi…




{'formality_f1': 0.8738876529477196,
 'jokes_f1': 0.9763030939265619,
 'f1_mean': 0.9250953734371408,
 'dev_loss': 0.40544823325913526}

In [43]:
for i in tokenizer(sentence).input_ids:
    print(tokenizer.decode(i))

[CLS]
couldn
'
t
you
open
the
door
?
[SEP]


In [42]:
tokenizer(sentence).input_ids

[101, 2481, 1005, 1056, 2017, 2330, 1996, 2341, 1029, 102]