In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence
from torch.nn.utils.rnn import pad_sequence
from torch.nn.utils.rnn import pack_sequence

import import_ipynb

from datetime import datetime
import argparse
import numpy as np

from sklearn.metrics import roc_curve, auc, roc_auc_score

from rnn import RNN
from retain import RETAIN
from retain_bkey import RETAIN_BKEY

importing Jupyter notebook from rnn.ipynb
importing Jupyter notebook from retain.ipynb
importing Jupyter notebook from retain_bkey.ipynb


In [2]:
def parsing():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cuda', action='store_true',
                        help='use CUDA (default: False)')
    parser.add_argument('--eval', action='store_true',
                        help='do evaluate (default: False)')
    parser.add_argument('--dropout', type=float, default=0.5,
                        help='dropout applied to layers (default: 0)')
    parser.add_argument('--clip', type=float, default=-1,
                        help='gradient clip, -1 means no clip (default: -1)')
    parser.add_argument('--epochs', type=int, default=500,
                        help='upper epoch limit (default: 500)')
    parser.add_argument('--report_step', type=int, default=1, metavar='N',
                        help='report interval (default: 20')
    parser.add_argument('--valid_step', type=int, default=1, metavar='N',
                        help='validation interval (default: 20')
    parser.add_argument('--lr', type=float, default=0.0001,
                        help='initial learning rate (default: 1e-3)')
    parser.add_argument('--optim', type=str, default='Adam',
                        help='optimizer to use (default: Adam)')
    parser.add_argument('--batchs', type=int, default=16,
                        help='number of batchs (default: 10)')
    parser.add_argument('--val_batchs', type=int, default=32,
                        help='number of batchs (default: 10)')
    parser.add_argument('--train_data', type=str, default='data/physionet-a/train')
    parser.add_argument('--dev_data', type=str, default='data/physionet-a/dev')
    parser.add_argument('--test_data', type=str, default='data/physionet-a/test')
    parser.add_argument('--seed', type=int, default=190408,
                        help='random seed')
    parser.add_argument('--model_name', type=str, default='AP',
                        help='the dataset to run (default: )')
    parser.add_argument('--embedding_size', type=int, default=128,
                        help='dimension of embeddings')
    parser.add_argument('--hidden_size', type=int, default=128,
                        help='dimension of hidden')
    return parser

In [3]:
def convert_nparr2torchtensor(t, rev=True):
    out = t[:]
    if rev:
        for idx, elm in enumerate(t):
            out[len(t)-idx-1] = torch.Tensor(elm)
    else:
        for idx, elm in enumerate(t):
            out[idx] = torch.Tensor(elm)   
    return out

def load_data(args):
    x_train = convert_nparr2torchtensor(np.load(args.train_data + '.x.npy'))
    y_train = convert_nparr2torchtensor(np.load(args.train_data + '.y.npy'))
    assert len(x_train) == len(y_train)
    x_dev = convert_nparr2torchtensor(np.load(args.dev_data + '.x.npy'))
    y_dev = convert_nparr2torchtensor(np.load(args.dev_data + '.y.npy'))
    assert len(x_dev) == len(y_dev)
    x_test = convert_nparr2torchtensor(np.load(args.test_data + '.x.npy'))
    y_test = convert_nparr2torchtensor(np.load(args.test_data + '.y.npy'))
    assert len(x_test) == len(y_test)
    return x_train, y_train, x_dev, y_dev, x_test, y_test

In [4]:
def load_model(args):
    model = None
    if args.model_name == 'RNN':
        #input_size, embedding_size, hidden_size, output_size
        model = RNN(args.input_size, args.embedding_size, args.hidden_size, args.output_size, 
                    dropout=args.dropout)
    elif args.model_name == 'RETAIN':
        #input_size, embedding_size, hidden_size, output_size
        model = RETAIN(args.input_size, args.embedding_size, args.hidden_size, args.output_size, 
                    dropout=args.dropout)
    elif args.model_name == 'RETAIN_BKEY':
        #input_size, embedding_size, hidden_size, output_size
        model = RETAIN_BKEY(args.input_size, args.embedding_size, args.hidden_size, args.output_size, 
                    dropout=args.dropout)
    else:
        print('No such model name')
        exit()
    return model    

In [5]:
def generate_batch_index(total_length, batch_size, do_shuffle = True):
    train_idx_list = np.arange(total_length)
    if do_shuffle:
        np.random.shuffle(train_idx_list)
    batch_indices = []
    for i in range((total_length // batch_size)+1):
        start_idx = i*batch_size
        end_idx = min(total_length, (i+1)*batch_size)
        if start_idx == end_idx:
            break
        sub_indices = train_idx_list[start_idx : end_idx]
        if len(sub_indices) > 0:
            batch_indices.append(sub_indices)
    return batch_indices
    
def get_paded_seq(x):
    x = list(x)
    x.sort(key=lambda element: -element.shape[0])
    lengths = [ele.shape[0] for ele in x]
    batches = pad_sequence(x, batch_first=True)
    return torch.Tensor(batches), torch.Tensor(lengths)
    
def train_per_epoch(model, batch_size, x_train, y_train, criterion, cuda_on=True):
    model.train()
    total_loss = 0
    count = len(x_train)
    batch_indices = generate_batch_index(count, batch_size)
    for idx_list in batch_indices:
        x, lengths = get_paded_seq(x_train[idx_list])
        x = Variable(x)
        y = Variable(torch.Tensor(y_train[idx_list]))
        if cuda_on:
            x, y = x.cuda(), y.cuda()
            lengths = lengths.cuda()
        optimizer.zero_grad()
        output = model(x, lengths)
        reg_loss = None
        for param in model.parameters():
            if reg_loss is None:
                reg_loss = 0.5 * torch.sum(param**2)
            else:
                reg_loss = reg_loss + 0.5 * param.norm(2)**2

        loss = criterion(output, y) #+ reg_loss * 0.0001

        total_loss += loss.item() /count
        loss.backward()
        optimizer.step()
    return total_loss

In [6]:
def validate(model, batch_size, x_dev, y_dev, criterion, cuda_on=True):
    model.eval()
    total_loss = 0.0
    count = len(x_dev)
    batch_indices = generate_batch_index(count, batch_size, do_shuffle = False)
    with torch.no_grad():
        for idx_list in batch_indices:
            x, lengths = get_paded_seq(x_dev[idx_list])
            x = Variable(x)
            y = Variable(torch.Tensor(y_dev[idx_list]))
            if cuda_on:
                x, y = x.cuda(), y.cuda()
                lengths = lengths.cuda()
            output = model(x, lengths)
            loss = criterion(output, y)
            total_loss += loss.item() 
        eval_loss = total_loss / count
        return eval_loss

In [7]:
def ROC_AUC(p, y): 
    auc_s = roc_auc_score(y, p)
    return auc_s

def evaluate(model, batch_size, x_test, y_test, criterion, cuda_on=True):
    model.eval()
    total_loss = 0.0
    count = len(x_test)
    batch_indices = generate_batch_index(count, batch_size, do_shuffle = False)
    pred = torch.Tensor([])
    with torch.no_grad():
        for idx_list in batch_indices:
            x, lengths = get_paded_seq(x_test[idx_list])
            x = Variable(x)
            if cuda_on:
                x = x.cuda()
                lengths = lengths.cuda()
            output = model(x, lengths)
            output = output.cpu()
            pred = torch.cat([pred, output])
        pred = pred.squeeze().numpy()
        #p = pred.argsort()
        #pred = pred[p]
        #y_test = y_test[p]
        return ROC_AUC(pred, y_test)

In [8]:
if __name__ == "__main__":  
    #parsing
    parser = parsing()
    args = parser.parse_args(['--cuda', '--eval', '--model_name', 'RETAIN', 
                              '--epochs', '1', '--lr', '0.0001'])
    print('[{:s}]'.format(str(datetime.now())), args)
    
    #seed
    torch.manual_seed(args.seed)
    #cuda checking
    if torch.cuda.is_available():
        if not args.cuda:
            print("WARNING: Please use a CUDA device. Run with --cuda")    
    
    #data load
    x_train, y_train, x_dev, y_dev, x_test, y_test = load_data(args)
    print('[{:s}] #seq: {:d}; input dim: {:d}; output dim: {:d};'.format(str(datetime.now()), 
                                                        len(x_train), 
                                                        x_train[0].shape[1],
                                                        y_train[0].shape[0]))
    args.input_size = x_train[0].shape[1]
    args.output_size = y_train[0].shape[0]
    
    #model load
    model = load_model(args)
    if args.cuda:
        model.cuda()
    
    #set optimizer
    optimizer = None    
    if args.optim == 'Adam' or args.optim == 'RMSprop':
        optimizer = getattr(optim, args.optim)(model.parameters(), lr=args.lr)

    #set loss
    criterion = nn.BCELoss(reduction='sum')
    print('-'*80)
    
    #run training
    best_vloss = None
    best_ep = 0
    model_file = "./save/{0}.pt".format(args.model_name)
    start_time = datetime.now()
    print("[{:s}] model: <{:s}>; lr: <{:.5f}>; optimizer: <{:s}>".format(
        str(start_time), args.model_name, args.lr, args.optim))
    for ep in range(1, args.epochs+1):
        rloss = train_per_epoch(model, args.batchs, x_train, y_train, criterion, cuda_on=True)
        
        if ep % args.report_step == 0:
            now = datetime.now()
            dist_time = now - start_time            
            print("[{:s}] Step {:2d}/{:2d}; loss: {:.5f}; {:.1f}s ".format(str(now), 
                                                                           ep, 
                                                                           args.epochs, 
                                                                           rloss, 
                                                                           dist_time.total_seconds()))

        if ep % args.valid_step == 0:
            vloss = validate(model, args.val_batchs, x_dev, y_dev, criterion, cuda_on=True)
            print("\t Validation loss {:.5f} ".format(vloss))
            if (best_vloss == None) or vloss < best_vloss:
                torch.save({'state_dict': model.state_dict()}, model_file)
                best_vloss = vloss
                best_ep = ep
    print('-' * 80)
    checkpoint = torch.load(model_file)
    model.load_state_dict(checkpoint['state_dict'])
    
    


[2019-04-10 11:22:42.813557] Namespace(batchs=16, clip=-1, cuda=True, dev_data='data/physionet-a/dev', dropout=0.5, embedding_size=128, epochs=1, eval=True, hidden_size=128, lr=0.0001, model_name='RETAIN', optim='Adam', report_step=1, seed=190408, test_data='data/physionet-a/test', train_data='data/physionet-a/train', val_batchs=32, valid_step=1)
[2019-04-10 11:22:43.275422] #seq: 3200; input dim: 41; output dim: 1;
--------------------------------------------------------------------------------
[2019-04-10 11:22:45.685639] model: <RETAIN>; lr: <0.00010>; optimizer: <Adam>


  range = torch.range(start=1, end=number_of_logits, device=device).view(1, -1)


[2019-04-10 11:22:56.295245] Step  1/ 1; loss: 0.32595; 10.6s 
	 Validation loss 0.34830 
--------------------------------------------------------------------------------


In [9]:
    best_vloss = validate(model, args.val_batchs, x_dev, y_dev, criterion, cuda_on=True)
    print('Best performance: epoch: {:d}, loss: {:.5f}'.format(best_ep, best_vloss))    
    if args.eval:
        _auc = evaluate(model, args.val_batchs, x_test, y_test, criterion, cuda_on=True)
        print('Evaluation- AUC: {:.5f}'.format(_auc))

Best performance: epoch: 1, loss: 0.34830
Evaluation- AUC: 0.46404
