In [None]:
# library

import math
import os
import sys
import subprocess
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as pat
import numpy as np
import pandas as pd
from tqdm import tqdm
import json
import random
from collections import namedtuple
import tensorboardX as tbx

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

from utils.mylogger import NewLogger
from utils.torchblock import Block

random.seed(999)
torch.manual_seed(999)

device = torch.device("cuda:0")
torch.backends.cudnn.benchmark = True

In [None]:
# config, logger, tensorboard

config = {
    'test_mode': False,
    'model_name': '1layer_nopl_lr2e-4_dropout0.2_ksize4_weight10',
    'num_epochs': 50,
    'batch_size': 32,
    'n_channels': 8,
    'k_size': 4,
    'd_rate': 0.2,
    'lr': 2e-4,
    'loss_weight': 10.0,
    'uaph_threshold': 7.05
}
c = namedtuple('config_class', config.keys())(*config.values())

logger = None
logger = NewLogger(test=c.test_mode, result_path='./log/', model_name=c.model_name)

logger.debug('=== config ===')
for k, v in config.items() : logger.debug('%s %s' % (k, v)) 
logger.debug('==============')

with open(os.path.join(logger.dir_path, 'config.json'), mode='w') as f:
    json.dump(config, f, indent=2)



In [None]:
# CTG Dataset

class CTGDataset(data.Dataset):
    def __init__(self, fhr_path_list, toco_path_list, uaph_path, uaph_threshold):
        self.fhr_path_list = fhr_path_list
        self.toco_path_list = toco_path_list
        self.uaph = pd.read_csv(uaph_path, header=None).values.reshape([-1])
        self.uaph_threshold = uaph_threshold

    def __len__(self):
        return len(self.fhr_path_list)

    def __getitem__(self, index):
        fhr_data = self.path2tensor(self.fhr_path_list[index])
        toco_data = self.path2tensor(self.toco_path_list[index])
        
        if self.uaph[index] < self.uaph_threshold:
            label = torch.tensor([0., 1.])
        else:
            label = torch.tensor([1., 0.])
        
        uaph = self.uaph[index]
        
        fhr_data = (fhr_data - 135) / 25
        return fhr_data, toco_data, label, uaph
    
    @staticmethod
    def path2tensor(path):
        np_data = pd.read_csv(path, header=None).values.reshape([-1])
        np_data = np_data[-14400:]
        nn_data = torch.from_numpy(np_data).to(torch.float).reshape(1, -1)
        return nn_data

In [None]:
# dataloader

fhr_path_list = ['./data/ctu/ctu_csv/%s_fhr.csv' % i for i in range(1, 552 + 1)]
toco_path_list = ['./data/ctu/ctu_csv/%s_toco.csv' % i for i in range(1, 552 + 1)]

all_dataset = CTGDataset(fhr_path_list, toco_path_list, './data/ctu/uaph.csv', c.uaph_threshold)
train_size = int(len(all_dataset) * 0.8)
val_size = len(all_dataset) - train_size
train_dataset, val_dataset = data.random_split(all_dataset, [train_size, val_size])

dataloader = {
    'train': data.DataLoader(train_dataset, batch_size=c.batch_size, 
                             shuffle=True, num_workers=4),
    'val': data.DataLoader(val_dataset, batch_size=c.batch_size, 
                           shuffle=True, num_workers=4),
}

logger.debug('train_size:%s, val_size:%s' % (train_size, val_size))

# iter(dataloader['train']).next()[0]

In [None]:
def calc_statistics(pred, truth):
    """
    Args:
        pred(torch.tensor): 0 or 1
        truth(torch.tensor): 0 or 1
    
    Returns:
        tp, fp, fn, tn, tpr, tnr, prec, acc, f1
    """
    
    tp = int(torch.sum((pred == 1) & (truth == 1)))   
    fp = int(torch.sum((pred == 1) & (truth == 0)))
    fn = int(torch.sum((pred == 0) & (truth == 1))) 
    tn = int(torch.sum((pred == 0) & (truth == 0)))
    
    tpr = tp / (tp + fn) if tp + fn > 0 else -1
    tnr = tn / (fp + tn) if fp + tn > 0 else -1
    acc = (tp + tn) / (tp + fp + fn + tn) if tp + fp + fn + tn > 0 else -1
    prec = tp / (tp + fp) if tp + fp > 0 else -1

    try:
        f1 = (2 * prec * tpr) / (tpr + prec)
    except:
        f1 = -1
        
    return tp, fp, fn, tn, tpr, tnr, acc, prec, f1

In [None]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
        
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

In [None]:
class Net(nn.Module):
    def __init__(self, n_channels, k_size, d_rate):
        super().__init__()
        
        self.seq = nn.Sequential(
            nn.MaxPool1d(4*10), # (1, 360)
            
            nn.Conv1d(1, n_channels, kernel_size=k_size, 
                      padding=k_size-1, padding_mode='circular'),
            nn.BatchNorm1d(n_channels),
            nn.ReLU(),
            
            #nn.MaxPool1d(2), #(n_chanels, 180)

            nn.Flatten(),
            nn.Dropout(p=d_rate),
            nn.Linear(n_channels*360, 2),
            nn.Softmax()
        )

    def forward(self, x):
        return self.seq(x)

In [None]:
# network, loss, optimizer

logger.debug('network initializing')

net = Net(n_channels=c.n_channels, k_size=c.k_size, d_rate=c.d_rate)
net = net.to(device).apply(weights_init)
weight = torch.tensor([1.0, c.loss_weight]).to(device)

logger.debug('network initialized')

criterion = nn.BCELoss(reduction='mean', weight=weight)
optimizer = optim.Adam(net.parameters(), lr=c.lr, betas=(0.8, 0.999))

logger.debug(net)

#net(iter(dataloader['train']).next()[0].to(device)).size()

In [None]:
# execute

writer = tbx.SummaryWriter(log_dir=logger.dir_path)

for epoch in range(1, c.num_epochs+1):
    
    for phase in ['train', 'val']:
        
        phase_loss = 0
        
        epoch_pred_cls = torch.tensor([]).to(torch.long)
        epoch_true_cls = torch.tensor([]).to(torch.long)
        
        if phase == 'train':
            net.train()
        else:
            net.eval()
        
        if (epoch == 1) & (phase == 'train'):
            continue
        
        for batch in dataloader[phase]:
            inputs = batch[0].to(device)
            labels = batch[2].to(device)
            
            optimizer.zero_grad()
            
            with torch.set_grad_enabled(phase == 'train'):
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                
                _, pred_cls = torch.max(outputs, 1)
                _, true_cls = torch.max(labels, 1)
                
                epoch_pred_cls = torch.cat([epoch_pred_cls, pred_cls.to('cpu')])
                epoch_true_cls = torch.cat([epoch_true_cls, true_cls.to('cpu')])
                
                phase_loss += loss.item() * inputs.size(0)
            
        phase_loss /= len(dataloader[phase].dataset)
        
        logger.debug('epoch: {}, phase: {}, loss: {:.4f}'.format(
                     epoch, phase, phase_loss))
        tp, fp, fn, tn, tpr, tnr, acc, prec, f1 = \
            calc_statistics(epoch_pred_cls, epoch_true_cls)
        
        writer.add_scalars('loss', {phase: phase_loss}, epoch)
        writer.add_scalars('statistics/tpr', {'%s_tpr' % phase: tpr}, epoch)
        writer.add_scalars('statistics/tnr', {'%s_tnr' % phase: tnr}, epoch)

        logger.info('epoch: {}, phase: {}, loss: {:.3f}'.format(epoch, phase, phase_loss))
#         logger.info('[tp: %s fn: %s] [fp: %s tn: %s]' % (tp, fn, fp, tn))
        logger.info('tpr: {:.3f}, tnr: {:.3f}'.format(tpr, tnr))
#         logger.info('acc: {:.3f}, prec: {:.3f}, f1: {:.3f}'.format(acc, prec, f1))

writer.close()
logger.info('tensorboard --logdir %s' % os.path.abspath(logger.dir_path))