In [12]:
import pandas as pd
import numpy as np
from tqdm import tqdm, trange
import torch

## Configurations

In [16]:
from configs import PathConfig, GlobalConfig, DeviceConfig, ModelConfig, OptimizerConfig, TrainConfig, EvalConfig

In [9]:
path_config_dict = {"data_dir": "./data/NER/", 
                    "output_dir": "./NER_output/"}
path_config = PathConfig(path_config_dict)

In [10]:
global_config_dict = {"fp16": False}
global_config = GlobalConfig(global_config_dict)

In [13]:
device_config_dict = {"no_cuda": False}
device_config = DeviceConfig(device_config_dict)
print("device name: {}".format(torch.cuda.get_device_name(0)))
print("number of gpus: {}".format(device_config.n_gpu))

device name: Tesla K80
number of gpus: 1


In [14]:
model_config_dict = {"bert_model": "bert-base-uncased",
                     "max_seq_length": 75,
                     "num_labels": 18,
                     "model_type": "token"}
model_config = ModelConfig(model_config_dict)

In [17]:
optimizer_config_dict = {"no_decay_params": ['bias', 'gamma', 'beta'],
                         "learning_rate": 3e-5}
optimizer_config = OptimizerConfig(optimizer_config_dict)

In [18]:
train_config_dict = {"train_batch_size": 32, 
                     "num_train_epochs": 5}
train_config = TrainConfig(train_config_dict)

In [19]:
eval_config = EvalConfig()

## Preprocess Data

### Create training and validation examples

In [3]:
from bert_data_utils import KaggleNERProcessor

In [4]:
kaggle_ner_processor = KaggleNERProcessor(data_dir="./data/NER/ner_dataset.csv", dev_percentage = 0.1)

In [5]:
train_examples = kaggle_ner_processor.get_train_examples(data_dir="./data/NER/ner_dataset.csv")
dev_examples = kaggle_ner_processor.get_dev_examples(data_dir="./data/NER/ner_dataset.csv")

In [6]:
label_list = kaggle_ner_processor.get_labels()

### Create dataloaders

In [20]:
from bert_utils import create_train_dataloader, create_eval_dataloader

In [21]:
train_dataloader = create_train_dataloader(train_examples=train_examples,
                                           model_config=model_config,
                                           train_config=train_config,
                                           label_list=label_list,
                                           device_config=device_config)

In [22]:
valid_dataloader = create_eval_dataloader(eval_examples=dev_examples, 
                                          model_config=model_config, 
                                          eval_config=eval_config, 
                                          label_list=label_list)

## Load Model

In [24]:
from bert_utils import load_model

In [25]:
model = load_model(model_config=model_config, 
                   path_config=path_config, 
                   device_config=device_config,
                   global_config=global_config)

## Configure Optimizer

In [26]:
from bert_utils import configure_optimizer

In [27]:
optimizer, optimizer_config, _ = configure_optimizer(optimizer_config=optimizer_config,
                                                     global_config=global_config, 
                                                     train_config=train_config, 
                                                     device_config=device_config, 
                                                     model=model, 
                                                     num_train_examples=len(train_dataloader))

## Train Model

In [31]:
from bert_utils import train_token_model

In [30]:
model, train_loss = train_token_model(model=model, 
                                      train_dataloader=train_dataloader, 
                                      optimizer=optimizer,
                                      train_config=train_config, 
                                      model_config=model_config, 
                                      optimizer_config=optimizer_config,
                                      device_config=device_config,
                                      global_config=global_config)

## Evaluate Model

In [32]:
from bert_utils import eval_token_model

In [28]:
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=2).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [29]:
preds, eval_loss, eval_accuracy = eval_token_model(model=model, 
                                                   eval_dataloader=valid_dataloader, 
                                                   model_config=model_config, 
                                                   device_config=device_config, 
                                                   label_list=label_list,
                                                   eval_func=flat_accuracy)