## Train

In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pwd

In [2]:
%cd /content/drive/MyDrive/Colab Notebooks/[Microdegree] Hypotension/code # change directory
!pwd

/content/drive/MyDrive/Colab Notebooks/[Microdegree] Hypotension/code
/content/drive/MyDrive/Colab Notebooks/[Microdegree] Hypotension/code


### Train

In [5]:
# !pip install torchsummary
# !pip install tensorboard
# !pip install wandb

In [2]:
import numpy as np
from collections import Counter
from tqdm import tqdm
from matplotlib import pyplot as plt

import os
import glob
import re
import pickle
import multiprocessing
import wandb
import argparse
from datetime import datetime
from pathlib import Path
import random
import json

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary

from model_resnet1d import ResNet1D
from pytorchtools import EarlyStopping

# from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, accuracy_score, precision_score, recall_score, f1_score # classification
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score # regression

In [3]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

def increment_path(path, exist_ok=False):
    """ Automatically increment path, i.e. runs/exp --> runs/exp0, runs/exp1 etc.

    Args:
        path (str or pathlib.Path): f"{model_dir}/{args.name}".
        exist_ok (bool): whether increment path (increment if False).
    """
    path = Path(path)
    if (path.exists() and exist_ok) or (not path.exists()):
        return str(path)
    else:
        dirs = glob.glob(f"{path}*")
        matches = [re.search(rf"%s(\d+)" % path.stem, d) for d in dirs]
        i = [int(m.groups()[0]) for m in matches if m]
        n = max(i) + 1 if i else 2
        return f"{path}{n}"

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']


_criterion_entrypoints = {
    'mean_squared': nn.MSELoss,
    'l1' : nn.L1Loss,
}

def criterion_entrypoint(criterion_name):
    return _criterion_entrypoints[criterion_name]

def is_criterion(criterion_name):
    return criterion_name in _criterion_entrypoints

def create_criterion(criterion_name, **kwargs):
    if is_criterion(criterion_name):
        create_fn = criterion_entrypoint(criterion_name)
        criterion = create_fn(**kwargs)
    else:
        raise RuntimeError('Unknown loss (%s)' % criterion_name)
    return criterion

now = datetime.now()
folder_name = now.strftime('%Y-%m-%d-%H:%M:%S')
parser = argparse.ArgumentParser()

parser.add_argument('--seed', type=int, default=42, help='random seed (default: 42)')
parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 1)')
parser.add_argument('--batch_size', type=int, default=128, help='input batch size for training (default: 64)')
parser.add_argument('--lr', type=float, default=1e-3, help='learning rate (default: 1e-3)')
# parser.add_argument('--lr_decay_step', type=int, default=20, help='learning rate scheduler deacy step (default: 20)')
parser.add_argument('--criterion', type=str, default='mean_squared', help='criterion type (default: cross_entropy)')
parser.add_argument('--log_interval', type=int, default=200, help='how many batches to wait before logging training status')
parser.add_argument('--model', type=str, default='resnet1d', help='model type (default: BaseModel)')
parser.add_argument('--name', default='exp', help='model save at {SM_MODEL_DIR}/{name}')
# parser.add_argument('--name', default='exp_'+folder_name, help='model save at {SM_MODEL_DIR}/{name}')
parser.add_argument('--model_dir', type=str, default=os.environ.get('SM_MODEL_DIR', './model_reg'))

# args = parser.parse_args()
args, _ = parser.parse_known_args()
print(args)

Namespace(seed=42, epochs=100, batch_size=128, lr=0.001, criterion='mean_squared', log_interval=200, model='resnet1d', name='exp', model_dir='./model_reg')


In [4]:
model_dir = args.model_dir
# save_dir = increment_path(os.path.join(model_dir, args.name))
# save_dir = increment_path(os.path.join(model_dir, args.name+"_"+args.criterion+"_"+str(args.epochs)))
save_dir = increment_path(os.path.join(model_dir, args.criterion+"_epoch"+str(args.epochs)+"_batch"+str(args.batch_size)+"_"+args.name))

print(model_dir)
print(save_dir)

./model_reg
model_reg\mean_squared_epoch100_batch128_exp


### # DataLoader

In [5]:
class PPGDataset(Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label

    def __getitem__(self, index):
        # return (torch.tensor(self.data[index], dtype=torch.float), torch.tensor(self.label[index], dtype=torch.long)) # torch.long
        return (torch.as_tensor(self.data[index], dtype=torch.float), torch.as_tensor(self.label[index], dtype=torch.long)) # torch.long

    def __len__(self):
        return len(self.data)

In [6]:
# get dataset
with open('../data_reg/train_reg_scaled_x.pkl', 'rb') as f:
    # train_X = pickle.load(f)
    X = pickle.load(f)
    train_X = np.expand_dims(X, 1)
    train_X = torch.tensor(train_X, dtype = torch.float32)
with open('../data_reg/train_reg_y.pkl', 'rb') as f:
    # train_Y = pickle.load(f)
    Y = pickle.load(f)
    train_Y = np.expand_dims(Y, 1)
    train_Y = torch.tensor(train_Y, dtype = torch.float32)

with open('../data_reg/valid_reg_scaled_x.pkl', 'rb') as f:
    # valid_X = pickle.load(f)
    X = pickle.load(f)
    valid_X = np.expand_dims(X, 1)
    valid_X = torch.tensor(valid_X, dtype = torch.float32)
with open('../data_reg/valid_reg_y.pkl', 'rb') as f:
    # valid_Y = pickle.load(f)
    Y = pickle.load(f)
    valid_Y = np.expand_dims(Y, 1)
    valid_Y = torch.tensor(valid_Y, dtype = torch.float32)

with open('../data_reg/test_reg_scaled_x.pkl', 'rb') as f:
    # test_X = pickle.load(f)
    X = pickle.load(f)
    test_X = np.expand_dims(X, 1)
    test_X = torch.tensor(test_X, dtype = torch.float32)
with open('../data_reg/test_reg_y.pkl', 'rb') as f:
    # test_Y = pickle.load(f)
    Y = pickle.load(f)
    test_Y = np.expand_dims(Y, 1)
    test_Y = torch.tensor(test_Y, dtype = torch.float32)

train_dataset = PPGDataset(train_X, train_Y)
val_dataset = PPGDataset(valid_X, valid_Y)
test_dataset = PPGDataset(test_X, test_Y)

print(type(train_X), type(train_Y))
print(train_X.shape, train_Y.shape)
# print(len(train_X), len(train_Y))
print(len(train_X[0]), train_X[0])
# print(train_Y[:20])
print()
print(type(valid_X), type(valid_Y))
print(valid_X.shape, valid_Y.shape)
# print(len(valid_X), len(valid_Y))
print(len(valid_X[0]), valid_X[0])
# print(valid_Y[:20])
print()
print(type(test_X), type(test_Y))
print(test_X.shape, test_Y.shape)
# # print(len(test_X), len(test_Y))
print(len(test_X[0]), test_X[0])
# # print(test_Y[:20])
print()

print(train_dataset)
print(val_dataset)
print(test_dataset)

# <class 'torch.Tensor'> <class 'torch.Tensor'>
# torch.Size([292544, 1, 3000]) torch.Size([292544, 1])

# <class 'torch.Tensor'> <class 'torch.Tensor'>
# torch.Size([95515, 1, 3000]) torch.Size([95515, 1])

# <class 'torch.Tensor'> <class 'torch.Tensor'>
# torch.Size([95880, 1, 3000]) torch.Size([95880, 1])

<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([292544, 1, 3000]) torch.Size([292544, 1])
1 tensor([[0.0982, 0.1071, 0.0982,  ..., 0.1161, 0.1071, 0.1161]])

<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([95515, 1, 3000]) torch.Size([95515, 1])
1 tensor([[0.0541, 0.0676, 0.0541,  ..., 0.5135, 0.4730, 0.4730]])

<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([95880, 1, 3000]) torch.Size([95880, 1])
1 tensor([[0.0849, 0.0755, 0.0755,  ..., 0.0849, 0.0849, 0.0849]])
<__main__.PPGDataset object at 0x000002495C7C2AD0>
<__main__.PPGDataset object at 0x000002495C7D5890>
<__main__.PPGDataset object at 0x000002495C78DD50>


In [7]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

train_dataloader = DataLoader(
    train_dataset,
    batch_size=args.batch_size,
    num_workers=multiprocessing.cpu_count() // 2,
    shuffle=True,
    pin_memory=use_cuda,
    drop_last=True,
    )

val_dataloader = DataLoader(
    val_dataset,
    batch_size=args.batch_size,
    num_workers=multiprocessing.cpu_count() // 2,
    shuffle=False,
    pin_memory=use_cuda,
    drop_last=True,
    )

test_dataloader = DataLoader(
    test_dataset,
    shuffle=False
)

print(device)
print(train_dataloader)
print(val_dataloader)
print(test_dataloader)

cpu
<torch.utils.data.dataloader.DataLoader object at 0x0000024942B02D50>
<torch.utils.data.dataloader.DataLoader object at 0x0000024954608210>
<torch.utils.data.dataloader.DataLoader object at 0x000002494165F390>


### # wandb

In [None]:
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "notebook name here"

# !pip install wandb -qqq
import wandb
wandb.login()
# !wandb login --relogin

True

In [None]:
# -- wandb initialize with configuration
config={
    "epochs": args.epochs,
    "batch_size": args.batch_size,
    "learning_rate" : args.lr,
    "architecture" : args.model,
    "loss" : args.criterion
}
wandb.init(project="KAIST GSDS Microdegree - Hypotension", name = str(save_dir.split('/')[-1])+str(args.model)+str(args.epochs), config=config)

### # Model

In [8]:
# make model
# device_str = "cuda"
# device = torch.device(device_str if torch.cuda.is_available() else "cpu")

## change the hyper-parameters for your own data
# (n_block, downsample_gap, increasefilter_gap) = (8, 1, 2)
# 34 layer (16*2+2): 16, 2, 4
# 98 layer (48*2+2): 48, 6, 12

model = ResNet1D(
    in_channels=1, # 3000,
    base_filters=128, # 128, # 64 for ResNet1D, 352 for ResNeXt1D
    kernel_size= 16, # kernel_size,
    stride=2, # stride,
    groups=32,
    n_block=16, # 48, # n_block=48,
    n_classes=1, # 3, 4,
    downsample_gap=2, # 6, # downsample_gap,
    increasefilter_gap=4, # 12, # increasefilter_gap,
    use_do=True)
model.to(device)
print(train_X.shape, train_Y.shape)
summary(model, (train_X.shape[1], train_X.shape[2])) # device=device
# exit()

model.verbose = False # True
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.lr)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_decay_step, gamma=0.5)
# loss_func = torch.nn.CrossEntropyLoss()
criterion = create_criterion(args.criterion)  # default: cross_entropy

logger = SummaryWriter(log_dir=save_dir)
with open(os.path.join(save_dir, 'config.json'), 'w', encoding='utf-8') as f:
    json.dump(vars(args), f, ensure_ascii=False, indent=4)


torch.Size([292544, 1, 3000]) torch.Size([292544, 1])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1            [-1, 128, 3000]           2,176
   MyConv1dPadSame-2            [-1, 128, 3000]               0
       BatchNorm1d-3            [-1, 128, 3000]             256
              ReLU-4            [-1, 128, 3000]               0
            Conv1d-5            [-1, 128, 3000]           8,320
   MyConv1dPadSame-6            [-1, 128, 3000]               0
       BatchNorm1d-7            [-1, 128, 3000]             256
              ReLU-8            [-1, 128, 3000]               0
           Dropout-9            [-1, 128, 3000]               0
           Conv1d-10            [-1, 128, 3000]           8,320
  MyConv1dPadSame-11            [-1, 128, 3000]               0
       BasicBlock-12            [-1, 128, 3000]               0
      BatchNorm1d-13            [-1, 128, 3000]  

### # Train

In [None]:
import warnings
warnings.filterwarnings('ignore') # "error", "ignore", "always", "default", "module" or "once"

# early_stopping
early_stopping = EarlyStopping(patience = 7, verbose = True)

# train
best_val_loss = np.inf
best_val_mae = 0
best_val_r2 = 0

# for _ in tqdm(range(args.epochs), desc="epoch", leave=False):
for epoch in range(args.epochs):

    # train loop
    model.train()

    loss_value = 0
    train_preds_by_batch = []
    train_labels_by_batch = []

    # prog_iter = tqdm(train_dataloader, desc="Training", leave=False)
    print("Training...")
    # for batch_idx, train_batch in enumerate(prog_iter):
    for batch_idx, train_batch in enumerate(train_dataloader):
        input_x, input_y = tuple(t.to(device) for t in train_batch)
        # input_x, input_y = tuple(t for t in train_batch)
        # input_x = input_x.to(device).float()
        # input_y = input_y.to(device).long()

        optimizer.zero_grad()

        # preds = model(input_x)
        outs = model(input_x)

        # print('outs : ', type(outs), outs.dtype, outs.shape, outs)
        # print('input_y : ', type(input_y), input_y.dtype, input_y.shape, input_y)

        loss = criterion(outs.to(torch.float32), input_y.to(torch.float32)) # regression

        loss.backward()
        optimizer.step()

        loss_value += loss.item()

        train_preds_by_batch.extend(outs.cpu().detach().numpy())
        train_labels_by_batch.extend(input_y.cpu().detach().numpy())

        if (batch_idx + 1) % args.log_interval == 0:
            train_loss = loss_value / args.log_interval

            train_mae = mean_absolute_error(train_labels_by_batch, train_preds_by_batch).item()
            train_mse = mean_squared_error(train_labels_by_batch, train_preds_by_batch).item()
            train_rmse = np.sqrt(train_mse)
            train_r2 = r2_score(train_labels_by_batch, train_preds_by_batch).item()

            current_lr = get_lr(optimizer)
            print(
                f"Epoch[{epoch + 1}/{args.epochs}]({batch_idx + 1}/{len(train_dataloader)}) || "
                f"training loss {train_loss:4.4} || training MAE {train_mae:4.4f} || training MSE {train_mse:4.4f} || training rMSE {train_rmse:4.4f} || training R2 {train_r2:4.4f} || lr {current_lr}"
            )
            logger.add_scalar("Train/loss", train_loss, epoch * len(train_dataloader) + batch_idx)
            logger.add_scalar("Train/mae", train_mae, epoch * len(train_dataloader) + batch_idx)
            logger.add_scalar("Train/mse", train_mse, epoch * len(train_dataloader) + batch_idx)
            logger.add_scalar("Train/rmse", train_rmse, epoch * len(train_dataloader) + batch_idx)
            logger.add_scalar("Train/r2_score", train_r2, epoch * len(train_dataloader) + batch_idx)

            loss_value = 0
            train_preds_by_batch = []
            train_labels_by_batch = []


    # logging wandb train phase
    wandb.log({
        'Train loss': train_loss,
        'Train mae': train_mae,
        'Train mse': train_mse,
        'Train rmse': train_rmse,
        'Train r2': train_r2,
    })
    # scheduler.step(_)
    # scheduler.step(loss)

    # val loop
    with torch.no_grad():

        model.eval()

        val_loss_items = []
        all_val_preds = []
        all_val_labels = []

        # prog_iter_test = tqdm(val_dataloader, desc="Testing", leave=False)
        print()
        print("Calculating validation results...")
        # for batch_idx, val_batch in enumerate(prog_iter_test):
        for val_batch in val_dataloader:
            input_x, input_y = tuple(t.to(device) for t in val_batch)
            # input_x, input_y = tuple(t for t in val_batch)
            # input_x = input_x.to(device).float()
            # input_y = input_y.to(device).long()

            outs = model(input_x)

            loss_item = criterion(outs.to(torch.float32), input_y.to(torch.float32)).item() # regression
            
            val_loss_items.append(loss_item)

            all_val_preds.extend(outs.cpu().detach().numpy())
            all_val_labels.extend(input_y.cpu().detach().numpy())

    val_loss = np.sum(val_loss_items) / len(val_dataloader)
    best_val_loss = min(best_val_loss, val_loss)

    val_mae = mean_absolute_error(all_val_labels, all_val_preds)
    val_mse = mean_squared_error(all_val_labels, all_val_preds)
    val_rmse = np.sqrt(val_mse)
    val_r2 = r2_score(all_val_labels, all_val_preds)
    best_val_mae = max(best_val_mae, val_mae)
    best_val_r2 = max(best_val_r2, val_mae)

    # for checking
    diff_y = [i-j for i, j in zip(all_val_labels, all_val_preds)]
    check_mae = np.mean(np.abs(diff_y))
    # print("MAE Score:", mae)

    # early stopping
    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        break

    if val_mae > best_val_mae:
        print(f"New best model for val mae : {val_mae:4.4f}! saving the best model..")
        torch.save(model.state_dict(), f"{save_dir}/best.pth")
        best_val_mae = val_mae
    torch.save(model.state_dict(), f"{save_dir}/last.pth")
    print(
        f"[Val] loss : {val_loss:4.4} mae : {val_mae:4.4f}, mse : {val_mse:4.4f}, rmse : {val_rmse:4.4f}, r2_score : {val_r2:4.4f} || (mae for check : {check_mae: 4.4f})"
        f"Best loss : {best_val_loss:4.4}, Best mae : {best_val_mae:4.4f}, Best r2_score : {best_val_r2:4.4f}"
    )
    logger.add_scalar("Val/loss", val_loss, epoch)
    logger.add_scalar("Val/mae", val_mae, epoch)
    logger.add_scalar("Val/mse", val_mse, epoch)
    logger.add_scalar("Val/rmse", val_rmse, epoch)
    logger.add_scalar("Val/r2_score", val_r2, epoch)
    print()

    # logging wandb valid phase
    wandb.log({
        'Valid loss': val_loss,
        'Valid val_mae': val_mae,
        'Valid mse': val_mse,
        'Valid rmse': val_rmse,
        'Valid r2': val_r2,
    })

wandb.finish()


In [None]:
with torch.no_grad():

    model.eval()

    test_loss_items = []
    all_test_preds = []
    all_test_labels = []

    # prog_iter_test = tqdm(test_dataloader, desc="Testing", leave=False)
    print()
    print("Testing results...")
    # for batch_idx, test_batch in enumerate(prog_iter_test):
    for test_batch in test_dataloader:
        input_x, input_y = tuple(t.to(device) for t in test_batch)
        # input_x, input_y = tuple(t for t in test_batch)
        # input_x = input_x.to(device).float()
        # input_y = input_y.to(device).long()

        outs = model(input_x)

        loss_item = criterion(outs.to(torch.float32), input_y.to(torch.float32)).item() # regression
        
        test_loss_items.append(loss_item)

        all_test_preds.extend(outs.cpu().numpy())
        all_test_labels.extend(input_y.cpu().numpy())

test_loss = np.sum(test_loss_items) / len(test_dataloader)
test_mae = mean_absolute_error(all_test_labels, all_test_preds)
test_mse = mean_squared_error(all_test_labels, all_test_preds)
test_rmse = np.sqrt(test_mse)
test_r2 = r2_score(all_test_labels, all_test_preds)

# for checking
diff_y = [i-j for i, j in zip(all_test_labels, all_test_preds)]
check_mae = np.mean(np.abs(diff_y))
# print("MAE Score:", mae)

print(
    f"[Test] loss : {test_loss:4.4} mae : {test_mae:4.4f}, mse : {test_mse:4.4f}, rmse : {test_rmse:4.4f}, r2_score : {test_r2:4.4f} || (mae for check : {check_mae: 4.4f})"
)

output_dir = os.environ.get('SM_OUTPUT_DATA_DIR', './output')
save_path = os.path.join(output_dir, f'./preds.pkl')
pickle.dump((all_test_preds, all_test_labels), open(save_path, 'wb'))
print(f"Inference Done! Inference result saved at {save_path}")