In [2]:
import sys
%load_ext autoreload
%autoreload 2
sys.path.append('..')
sys.path.append('../keras_bert/')

import os
import re
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from lib import bert_model
from tqdm import tqdm_notebook as tqdm

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [3]:
%env CUDA_VISIBLE_DEVICES=1

NUM_LABELS = 374

params = {
    'num_labels': NUM_LABELS,
    'label_list': [str(label) for label in range(NUM_LABELS)],
    'output_dir': '../output',
    'cache_dir': '../model_cache',
    'bert_model': 'bert-base-multilingual-uncased',
    'max_seq_length': 128,
    'train_batch_size': 32,
    'eval_batch_size': 8,
    'learning_rate': 2e-5,
    'warmup_proportion': 0.1,
    'num_train_epochs': 2,
    'type': 'multilabel',
    'seed': 1331,
    'device': torch.device(
        'cuda' if torch.cuda.is_available()
        else 'cpu')
}

params['lower_case'] = (params['bert_model'][-7:] == 'uncased')

env: CUDA_VISIBLE_DEVICES=1


## Preparing data

In [None]:
DATA_DIR = '../datasets'

df_train = pd.read_csv(
    os.path.join(DATA_DIR, 'data/train.csv'),
    encoding='utf-8',
)
df_valid = pd.read_csv(
    os.path.join(DATA_DIR, 'data/dev.csv'),
    encoding='utf-8',
)
df_test = pd.read_csv(
    os.path.join(DATA_DIR, 'data/test.csv'),
    encoding='utf-8',
)

In [6]:
def create_ohe_labels(labels_column):
    labels_str = re.sub('\s+', ' ', labels_column.strip('[] ')).split(' ')
    text_labels = np.array(labels_str, dtype=int)
    text_labels
    codes = np.zeros(params['num_labels'], dtype=int)
    codes[text_labels] = 1
    return codes.tolist()

def get_data_ohe_labels(df):
    targets = []
    for i, row in tqdm(df.iterrows(), total=len(df)):
        try:
            targets.append(create_ohe_labels(row['labels']))
        except:
            print(i)
            break
    return targets

In [7]:
y_train = get_data_ohe_labels(df_train)
y_valid = get_data_ohe_labels(df_valid)
y_test = get_data_ohe_labels(df_test)

HBox(children=(IntProgress(value=0, max=104724), HTML(value='')))




HBox(children=(IntProgress(value=0, max=26182), HTML(value='')))




HBox(children=(IntProgress(value=0, max=32727), HTML(value='')))




In [8]:
X_train = df_train['text'].values
X_valid = df_valid['text'].values
X_test = df_test['text'].values

## Training BERT model

In [17]:
model = bert_model.BertTextClassificationModel(params)

Downloading BERT...
Completed!


In [None]:
result = model.fit(
    X_train,
    y_train,
    batch_size=32,
    n_epochs=50,
    validation_data=(X_valid, y_valid),
    best_model_output='model_saved.pth',
)
result


Epoch: 1


Iteration: 100%|██████████| 3273/3273 [26:31<00:00,  2.35it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.15it/s]


{'train_log_loss': 0.048060563627119426, 'eval_accuracy': 0.0}

Epoch: 2


Iteration: 100%|██████████| 3273/3273 [25:39<00:00,  2.35it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.05it/s]


{'train_log_loss': 0.019767339077567312, 'eval_accuracy': 0.35272324497746543}

Epoch: 3


Iteration: 100%|██████████| 3273/3273 [25:43<00:00,  2.19it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 25.84it/s]


{'train_log_loss': 0.013193859488256856, 'eval_accuracy': 0.42777480711939503}

Epoch: 4


Iteration: 100%|██████████| 3273/3273 [25:42<00:00,  2.26it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.25it/s]


{'train_log_loss': 0.01043379671568335, 'eval_accuracy': 0.46791688946604537}

Epoch: 5


Iteration: 100%|██████████| 3273/3273 [25:34<00:00,  2.35it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.28it/s]


{'train_log_loss': 0.00860750918702575, 'eval_accuracy': 0.4996180582079291}

Epoch: 6


Iteration: 100%|██████████| 3273/3273 [26:42<00:00,  2.17it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.12it/s]


{'train_log_loss': 0.007259796141355711, 'eval_accuracy': 0.5182186234817814}

Epoch: 7


Iteration: 100%|██████████| 3273/3273 [26:09<00:00,  2.19it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.42it/s]


{'train_log_loss': 0.00626634600496314, 'eval_accuracy': 0.5325414406844396}

Epoch: 8


Iteration: 100%|██████████| 3273/3273 [25:25<00:00,  2.35it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.43it/s]


{'train_log_loss': 0.00544912168001224, 'eval_accuracy': 0.5435795584752884}

Epoch: 9


Iteration: 100%|██████████| 3273/3273 [26:39<00:00,  2.20it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.34it/s]


{'train_log_loss': 0.004810165283494972, 'eval_accuracy': 0.5490795202811092}

Epoch: 10


Iteration: 100%|██████████| 3273/3273 [26:30<00:00,  2.16it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.40it/s]


{'train_log_loss': 0.004362694543827322, 'eval_accuracy': 0.5538537926819953}

Epoch: 11


Iteration: 100%|██████████| 3273/3273 [26:28<00:00,  2.23it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.26it/s]


{'train_log_loss': 0.0038583733796698615, 'eval_accuracy': 0.5573294629898403}

Epoch: 12


Iteration: 100%|██████████| 3273/3273 [26:31<00:00,  2.35it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.44it/s]


{'train_log_loss': 0.003613242186317684, 'eval_accuracy': 0.5567947444809411}

Epoch: 13


Iteration: 100%|██████████| 3273/3273 [25:35<00:00,  2.35it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.34it/s]


{'train_log_loss': 0.0033043691755745936, 'eval_accuracy': 0.5577877931403254}

Epoch: 14


Iteration: 100%|██████████| 3273/3273 [25:50<00:00,  2.27it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.47it/s]


{'train_log_loss': 0.002965849659657046, 'eval_accuracy': 0.5620655412115194}

Epoch: 15


Iteration: 100%|██████████| 3273/3273 [26:18<00:00,  2.35it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 25.29it/s]


{'train_log_loss': 0.0027723311692171237, 'eval_accuracy': 0.5619509586738981}

Epoch: 16


Iteration: 100%|██████████| 3273/3273 [25:56<00:00,  2.28it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.37it/s]


{'train_log_loss': 0.002689595715889509, 'eval_accuracy': 0.5640898327094951}

Epoch: 17


Iteration: 100%|██████████| 3273/3273 [26:33<00:00,  2.35it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.21it/s]


{'train_log_loss': 0.002462626811318393, 'eval_accuracy': 0.5666870368955771}

Epoch: 18


Iteration: 100%|██████████| 3273/3273 [27:06<00:00,  2.15it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:06<00:00, 26.27it/s]


{'train_log_loss': 0.0023710784416732127, 'eval_accuracy': 0.5685203574975174}

Epoch: 19


Iteration: 100%|██████████| 3273/3273 [26:44<00:00,  2.34it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:06<00:00, 26.33it/s]


{'train_log_loss': 0.0022367908156652907, 'eval_accuracy': 0.5665342601787487}

Epoch: 20


Iteration: 100%|██████████| 3273/3273 [26:38<00:00,  2.15it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:06<00:00, 26.09it/s]


{'train_log_loss': 0.0021017167791227316, 'eval_accuracy': 0.5660759300282637}

Epoch: 21


Predicting: 100%|██████████| 3273/3273 [02:06<00:00, 26.24it/s]


{'train_log_loss': 0.0018440465105656893, 'eval_accuracy': 0.5686349400351386}

Epoch: 23


Iteration: 100%|██████████| 3273/3273 [27:24<00:00,  2.16it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:06<00:00, 26.57it/s]


{'train_log_loss': 0.0018537262139590872, 'eval_accuracy': 0.5663432892827133}

Epoch: 24


Iteration: 100%|██████████| 3273/3273 [26:40<00:00,  2.15it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:06<00:00, 26.32it/s]


{'train_log_loss': 0.0016981142949167593, 'eval_accuracy': 0.5687113283935529}

Epoch: 25


Iteration: 100%|██████████| 3273/3273 [25:28<00:00,  2.19it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.53it/s]


{'train_log_loss': 0.0017267979587084468, 'eval_accuracy': 0.5687113283935529}

Epoch: 26


Iteration: 100%|██████████| 3273/3273 [26:40<00:00,  2.35it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:06<00:00, 26.53it/s]


{'train_log_loss': 0.001647156932271592, 'eval_accuracy': 0.5692460469024521}

Epoch: 27


Iteration: 100%|██████████| 3273/3273 [27:16<00:00,  2.20it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:06<00:00, 26.19it/s]


{'train_log_loss': 0.0015654500105531266, 'eval_accuracy': 0.5695516003361087}

Epoch: 28


Iteration: 100%|██████████| 3273/3273 [26:03<00:00,  2.27it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:06<00:00, 26.55it/s]


{'train_log_loss': 0.0015843937832985845, 'eval_accuracy': 0.5710411733251852}

Epoch: 29


Iteration: 100%|██████████| 3273/3273 [26:49<00:00,  2.15it/s]


***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:06<00:00, 26.54it/s]


{'train_log_loss': 0.0015061203975090779, 'eval_accuracy': 0.5695897945153159}

Epoch: 30


Iteration: 100%|██████████| 3273/3273 [26:32<00:00,  2.16it/s]


***** Running evaluation *****


Predicting:  30%|███       | 996/3273 [00:38<01:27, 25.98it/s]

In [19]:
result, preds = model.evaluate(X_test, y_test, batch_size=8)

***** Running evaluation *****


Predicting: 100%|██████████| 4091/4091 [02:38<00:00, 25.79it/s]


In [20]:
result_val, preds_val = model.evaluate(X_valid, y_valid, batch_size=8)

***** Running evaluation *****


Predicting: 100%|██████████| 3273/3273 [02:07<00:00, 26.58it/s]


In [21]:
result, result_val

({'eval_accuracy': 0.5777187032114156}, {'eval_accuracy': 0.5781452906577038})

In [22]:
from sklearn import metrics
def print_metrics(y_true, preds):
    y_true = np.array(y_true)
    y_pred = np.array(preds >= 0.5, dtype=int)
    return {
        'micro_f1': metrics.f1_score(y_true, y_pred, average='micro'),
        'macro_f1': metrics.f1_score(y_true, y_pred, average='macro'),
        'accuracy': metrics.accuracy_score(y_true, y_pred),
        'hamming_accuracy': 1 - metrics.hamming_loss(y_true, y_pred)
    }

In [23]:
print_metrics(y_valid, preds_val)

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


{'micro_f1': 0.6950016562994591,
 'macro_f1': 0.22566219109122998,
 'accuracy': 0.5781452906577038,
 'hamming_accuracy': 0.9966150153369033}

In [24]:
print_metrics(y_test, preds)

{'micro_f1': 0.6918780681383933,
 'macro_f1': 0.22607089294303334,
 'accuracy': 0.5777187032114156,
 'hamming_accuracy': 0.996574481257932}