In [1]:
from transformers import BertTokenizer
from pathlib import Path
import torch

from box import Box
import pandas as pd
import collections
import os
from tqdm import tqdm, trange
import sys
import random
import numpy as np
import apex
from sklearn.model_selection import train_test_split

import datetime

from fast_bert.modeling import BertForMultiLabelSequenceClassification
from fast_bert.data_cls import BertDataBunch, InputExample, InputFeatures, MultiLabelTextProcessor, convert_examples_to_features
from fast_bert.learner_cls import BertLearner
from fast_bert.metrics import accuracy_multilabel, accuracy_thresh, fbeta, roc_auc

In [2]:
torch.cuda.empty_cache()

In [3]:
pd.set_option('display.max_colwidth', -1)
run_start_time = datetime.datetime.today().strftime('%Y-%m-%d_%H-%M-%S')

In [4]:
LOG_PATH=Path('/scratch/spf248/twitter/log/')
DATA_PATH=Path('/scratch/spf248/twitter/data/classification/')
LABEL_PATH=Path('/scratch/spf248/twitter/data/classification/')
OUTPUT_PATH=Path('/scratch/spf248/twitter/data/classification/output0/')
FINETUNED_PATH = None

In [5]:
args = Box({
    "run_text": "multilabel toxic comments with freezable layers",
    "train_size": -1,
    "val_size": -1,
    "log_path": LOG_PATH,
    "full_data_dir": DATA_PATH,
    "data_dir": DATA_PATH,
    "task_name": "labor_market_classification",
    "no_cuda": False,
#     "bert_model": BERT_PRETRAINED_PATH,
    "output_dir": OUTPUT_PATH,
    "max_seq_length": 512,
    "do_train": True,
    "do_eval": True,
    "do_lower_case": True,
    "train_batch_size": 8,
    "eval_batch_size": 16,
    "learning_rate": 5e-5,
    "num_train_epochs": 6,
    "warmup_proportion": 0.0,
    "no_cuda": False,
    "local_rank": -1,
    "seed": 42,
    "gradient_accumulation_steps": 1,
    "optimize_on_cpu": False,
    "fp16": True,
    "fp16_opt_level": "O1",
    "weight_decay": 0.0,
    "adam_epsilon": 1e-8,
    "max_grad_norm": 1.0,
    "max_steps": -1,
    "warmup_steps": 500,
    "logging_steps": 50,
    "eval_all_checkpoints": True,
    "overwrite_output_dir": True,
    "overwrite_cache": False,
    "seed": 42,
    "loss_scale": 128,
    "task_name": 'intent',
    "model_name": 'bert-base-uncased',
    "model_type": 'bert'
})

In [6]:
import logging

logfile = str(LOG_PATH/'log-{}-{}.txt'.format(run_start_time, args["run_text"]))

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
    datefmt='%m/%d/%Y %H:%M:%S',
    handlers=[
        logging.FileHandler(logfile),
        logging.StreamHandler(sys.stdout)
    ])

logger = logging.getLogger()

In [7]:
logger.info(args)

10/24/2019 19:22:43 - INFO - root -   {'run_text': 'multilabel toxic comments with freezable layers', 'train_size': -1, 'val_size': -1, 'log_path': PosixPath('/scratch/spf248/twitter/log'), 'full_data_dir': PosixPath('/scratch/spf248/twitter/data/classification'), 'data_dir': PosixPath('/scratch/spf248/twitter/data/classification'), 'task_name': 'intent', 'no_cuda': False, 'output_dir': PosixPath('/scratch/spf248/twitter/data/classification/output0'), 'max_seq_length': 512, 'do_train': True, 'do_eval': True, 'do_lower_case': True, 'train_batch_size': 8, 'eval_batch_size': 16, 'learning_rate': 5e-05, 'num_train_epochs': 6, 'warmup_proportion': 0.0, 'local_rank': -1, 'seed': 42, 'gradient_accumulation_steps': 1, 'optimize_on_cpu': False, 'fp16': True, 'fp16_opt_level': 'O1', 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'max_steps': -1, 'warmup_steps': 500, 'logging_steps': 50, 'eval_all_checkpoints': True, 'overwrite_output_dir': True, 'overwrite_cache': False, 'loss

In [8]:
device = torch.device('cuda')
if torch.cuda.device_count() > 1:
    args.multi_gpu = True
else:
    args.multi_gpu = False

In [9]:
label_cols = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]

In [10]:
databunch = BertDataBunch(
args['data_dir'], 
LABEL_PATH, 
args.model_name, 
train_file='train_sample.csv', 
val_file='val_sample.csv',
# test_data='test.csv',
text_col="comment_text", 
label_col=label_cols,
batch_size_per_gpu=args['train_batch_size'], 
max_seq_length=args['max_seq_length'], 
multi_gpu=args.multi_gpu, 
multi_label=True, 
model_type=args.model_type)

10/24/2019 19:22:43 - INFO - transformers.tokenization_utils -   loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/spf248/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
10/24/2019 19:22:43 - INFO - root -   Loading features from cached file /scratch/spf248/twitter/data/classification/cache/cached_bert_train_multi_label_512
10/24/2019 19:22:43 - INFO - root -   Loading features from cached file /scratch/spf248/twitter/data/classification/cache/cached_bert_dev_multi_label_512


In [11]:
databunch.train_dl.dataset[0][3]

tensor([0., 0., 0., 0., 0., 0.])

In [12]:
num_labels = len(databunch.labels)
num_labels

6

In [13]:
metrics = []
metrics.append({'name': 'accuracy_thresh', 'function': accuracy_thresh})
metrics.append({'name': 'roc_auc', 'function': roc_auc})
metrics.append({'name': 'fbeta', 'function': fbeta})

In [14]:
learner = BertLearner.from_pretrained_model(
databunch, 
args.model_name, 
metrics=metrics, 
device=device, 
logger=logger, 
output_dir=args.output_dir, 
finetuned_wgts_path=FINETUNED_PATH, 
warmup_steps=args.warmup_steps,
multi_gpu=args.multi_gpu, 
is_fp16=args.fp16, 
multi_label=True, 
logging_steps=0)

10/24/2019 19:22:43 - INFO - transformers.configuration_utils -   loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/spf248/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.bf3b9ea126d8c0001ee8a1e8b92229871d06d36d8808208cc2449280da87785c
10/24/2019 19:22:43 - INFO - transformers.configuration_utils -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 6,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pruned_heads": {},
  "torchscript": false,
  "type_vocab_size": 2,
  "use_bfloat16": false,
  "vocab_size": 30522
}

10/24/2019 19:22:43

100%|██████████| 440473133/440473133 [00:27<00:00, 15818020.43B/s]

10/24/2019 19:23:11 - INFO - transformers.file_utils -   copying /state/partition1/job-5251778/tmpn30pxylr to cache at /home/spf248/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157





10/24/2019 19:23:13 - INFO - transformers.file_utils -   creating metadata file for /home/spf248/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
10/24/2019 19:23:13 - INFO - transformers.file_utils -   removing temp file /state/partition1/job-5251778/tmpn30pxylr
10/24/2019 19:23:13 - INFO - transformers.modeling_utils -   loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/spf248/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
10/24/2019 19:23:16 - INFO - transformers.modeling_utils -   Weights of BertForMultiLabelSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']
10/24/2019 19:23:16 - INFO - transformers.modeling_utils -   Weights from pretrained

In [None]:
learner.fit(args.num_train_epochs, args.learning_rate, validate=True)

/scratch/spf248/twitter/data/classification/output0/tensorboard
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
10/24/2019 19:24:29 - INFO - root -   ***** Running training *****
10/24/2019 19:24:29 - INFO - root -     Num examples = 1000
10/24/2019 19:24:29 - INFO - root -     Num Epochs = 6
10/24/2019 19:24:29 - INFO - root -     Total train batch size (w. 

In [1]:
learner.validate()

NameError: name 'learner' is not defined

In [None]:
learner.save_model()

In [2]:
learner.predict_batch(list(pd.read_csv('../data/test.csv')['comment_text'].values))

NameError: name 'learner' is not defined