In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import sys
root_path = '../../../'
sys.path.append(root_path)

In [23]:
import argparse
import logging
import os
import numpy as np
import torch
from src.ner import utils
from src.booster.progressive_encoder import CharEncoder, EntityEncoder, WordEncoder
from src.booster.progNN.net import LSTMCRF
from src.ner.model.data_loader import DataLoader

In [24]:
data_dir = os.path.join(root_path, 'src/ner/data/bc5cdr_iobes_id')
model_dir = os.path.join(root_path, 'src/ner/experiments/disease/st/st_bc5cdr_all')
restore_file = 'best'

In [25]:
# 1. set the device to train on
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [26]:
# 2. Load the parameters from json file
network_params = os.path.join(model_dir, 'params.json')
assert os.path.isfile(network_params), "No json configuration file found at {}".format(network_params)
params = utils.Params(network_params)
params.cuda = torch.cuda.is_available() # use GPU if available

In [27]:
# 3. Set the random seed for reproducible experiments
torch.manual_seed(230)
if params.cuda: torch.cuda.manual_seed(230)
np.random.seed(0)

In [28]:
# 4. Set the logger
utils.set_logger(os.path.join(model_dir, 'train.log'))

In [29]:
#5.1 load the encoders
data_encoder = utils.load_obj(os.path.join(model_dir, 'data_encoder.pkl'))
label_encoder = utils.load_obj(os.path.join(model_dir, 'label_encoder.pkl'))
# 5.2 load data
data_loader = DataLoader(params, data_dir, data_encoder, label_encoder)
data = data_loader.load_data(['test'])
test_data = data['test']
# 5.3 specify the train and val dataset sizes
params.test_size = test_data['size']
test_data_iterator = data_loader.batch_iterator(test_data, params, shuffle=False)
logging.info("- done.")
logging.info('test size: {}'.format(params.test_size))

- done.
test size: 4796


In [30]:
# 6. Modeling
# 6.1 Define the model
model = LSTMCRF(params=params,
                char_vocab_length=data_encoder[CharEncoder.FEATURE_NAME].vocab_length,
                num_tags=label_encoder[EntityEncoder.FEATURE_NAME].num_tags,
                pretrained_word_vecs=torch.from_numpy(data_encoder[WordEncoder.FEATURE_NAME].vectors),
                dropout=params.dropout,
                decoder_type=params.decoder,
                bidirectional=True,
                freeze_embeddings=False).to(device).float()

# 6.2 fetch loss function and metrics
from src.ner.evaluation import accuracy_score, f1_score, precision_score, recall_score

metrics = {'accuracy': accuracy_score,
           'f1_score': f1_score,
           'precision_score': precision_score,
           'recall_score': recall_score}

utils.load_checkpoint(os.path.join(model_dir, restore_file + '.pth'), model)

  "num_layers={}".format(dropout, num_layers))


{'epoch': 74,
 'optim_dict': {'param_groups': [{'dampening': 0,
    'initial_lr': 0.015,
    'lr': 0.003225806451612903,
    'momentum': 0.9,
    'nesterov': False,
    'params': [2581706589312,
     2581706589168,
     2581706589096,
     2581706547560,
     2581706547992,
     2581706548640,
     2581706566680,
     2581706565888,
     2581706565600,
     2581706565384,
     2581706565168,
     2581706613960,
     2581706613888,
     2581706613600,
     2581706613528,
     2581706613456,
     2581706613744,
     2581706613168,
     2581706613096,
     2581706614248,
     2581706614176,
     2581706614032,
     2581706613384,
     2581706613024],
    'weight_decay': 0}],
  'state': {2581706547560: {'momentum_buffer': tensor([[ 1.8805e-02, -6.7515e-03,  1.1819e-02,  ...,  2.2066e-02,
             -1.4792e-02,  9.7487e-03],
            [-8.6670e-03,  2.7189e-02, -4.3166e-03,  ..., -5.2701e-02,
              2.4169e-02,  6.8134e-03],
            [-4.7623e-04, -1.3882e-02,  1.7814e-03,  .

In [31]:
# Evaluate
from src.ner.evaluate import evaluate
num_steps = (params.test_size + 1) // params.batch_size
test_metrics = evaluate(model,
                        test_data_iterator,
                        metrics,
                        num_steps,
                        label_encoder)
save_path = os.path.join(model_dir, "metrics_test_{}.json".format(restore_file))
utils.save_dict_to_json(test_metrics, save_path)

- train Eval metrics : accuracy: 0.966 ; f1_score: 0.844 ; precision_score: 0.849 ; recall_score: 0.840 ; loss: 22.777
