## Train

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pwd

/content


In [None]:
%cd "/content/drive/MyDrive/[Microdegree] Hypotension/code"
!pwd

/content/drive/MyDrive/[Microdegree] Hypotension/code
/content/drive/MyDrive/[Microdegree] Hypotension/code


### Train

In [None]:
# !pip install torchsummary
# !pip install tensorboard
# !pip install wandb

In [None]:
import numpy as np
from collections import Counter
from tqdm import tqdm
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

import os
import glob
import re
import pickle
import multiprocessing
import wandb
import argparse
from datetime import datetime
from pathlib import Path
import random
import json

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary

from model_resnet1d import ResNet1D
# from sklearn.model_selection import train_test_split

In [None]:
now = datetime.now()
folder_name = now.strftime('%Y-%m-%d-%H:%M:%S')
parser = argparse.ArgumentParser()

def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

def increment_path(path, exist_ok=False):
    """ Automatically increment path, i.e. runs/exp --> runs/exp0, runs/exp1 etc.

    Args:
        path (str or pathlib.Path): f"{model_dir}/{args.name}".
        exist_ok (bool): whether increment path (increment if False).
    """
    path = Path(path)
    if (path.exists() and exist_ok) or (not path.exists()):
        return str(path)
    else:
        dirs = glob.glob(f"{path}*")
        matches = [re.search(rf"%s(\d+)" % path.stem, d) for d in dirs]
        i = [int(m.groups()[0]) for m in matches if m]
        n = max(i) + 1 if i else 2
        return f"{path}{n}"

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']


_criterion_entrypoints = {
    'cross_entropy': nn.CrossEntropyLoss,
    'mean_squared': nn.MSELoss,
    # 'focal': FocalLoss,
    # 'label_smoothing': LabelSmoothingLoss,
    # 'f1': F1Loss
}

def criterion_entrypoint(criterion_name):
    return _criterion_entrypoints[criterion_name]

def is_criterion(criterion_name):
    return criterion_name in _criterion_entrypoints

def create_criterion(criterion_name, **kwargs):
    if is_criterion(criterion_name):
        create_fn = criterion_entrypoint(criterion_name)
        criterion = create_fn(**kwargs)
    else:
        raise RuntimeError('Unknown loss (%s)' % criterion_name)
    return criterion

parser.add_argument('--seed', type=int, default=42, help='random seed (default: 42)')
parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 1)')
parser.add_argument('--batch_size', type=int, default=64, help='input batch size for training (default: 64)')
parser.add_argument('--lr', type=float, default=1e-3, help='learning rate (default: 1e-3)')
parser.add_argument('--criterion', type=str, default='mean_squared', help='criterion type (default: cross_entropy)')
parser.add_argument('--log_interval', type=int, default=10, help='how many batches to wait before logging training status')
parser.add_argument('--model', type=str, default='resnet1d', help='model type (default: BaseModel)')
parser.add_argument('--name', default='exp_'+folder_name, help='model save at {SM_MODEL_DIR}/{name}')
parser.add_argument('--model_dir', type=str, default=os.environ.get('SM_MODEL_DIR', './model_reg'))

# args = parser.parse_args()
args, _ = parser.parse_known_args()
print(args)

Namespace(seed=42, epochs=100, batch_size=64, lr=0.001, criterion='mean_squared', log_interval=10, model='resnet1d', name='exp_2023-12-08-18:39:34', model_dir='./model_reg')


In [None]:
model_dir = args.model_dir
save_dir = increment_path(os.path.join(model_dir, args.name))

print(model_dir)
print(save_dir)

./model_reg
model_reg/exp_2023-12-08-18:39:34


### # DataLoader

In [None]:
class PPGDataset(Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label

    def __getitem__(self, index):
        # return (torch.tensor(self.data[index], dtype=torch.float), torch.tensor(self.label[index], dtype=torch.long)) # torch.long
        return (torch.as_tensor(self.data[index], dtype=torch.float), torch.as_tensor(self.label[index], dtype=torch.long)) # torch.long

    def __len__(self):
        return len(self.data)

In [None]:
# get dataset
with open('../data_reg/train_reg_x_cleaned.pkl', 'rb') as f:
    # train_X = pickle.load(f)
    X = pickle.load(f)
    train_X = np.expand_dims(X, 1)
    train_X = torch.tensor(train_X, dtype = torch.float32)
with open('../data_reg/train_reg_y_cleaned.pkl', 'rb') as f:
    # train_Y = pickle.load(f)
    Y = pickle.load(f)
    train_Y = np.expand_dims(Y, 1)
    train_Y = torch.tensor(train_Y, dtype = torch.float32)

with open('../data_reg/valid_reg_x_cleaned.pkl', 'rb') as f:
    # valid_X = pickle.load(f)
    X = pickle.load(f)
    valid_X = np.expand_dims(X, 1)
    valid_X = torch.tensor(valid_X, dtype = torch.float32)
with open('../data_reg/valid_reg_y_cleaned.pkl', 'rb') as f:
    # valid_Y = pickle.load(f)
    Y = pickle.load(f)
    valid_Y = np.expand_dims(Y, 1)
    valid_Y = torch.tensor(valid_Y, dtype = torch.float32)

with open('../data_reg/test_reg_x_cleaned.pkl', 'rb') as f:
    # test_X = pickle.load(f)
    X = pickle.load(f)
    test_X = np.expand_dims(X, 1)
    test_X = torch.tensor(test_X, dtype = torch.float32)
with open('../data_reg/test_reg_y_cleaned.pkl', 'rb') as f:
    # test_Y = pickle.load(f)
    Y = pickle.load(f)
    test_Y = np.expand_dims(Y, 1)
    test_Y = torch.tensor(test_Y, dtype = torch.float32)

train_dataset = PPGDataset(train_X, train_Y)
val_dataset = PPGDataset(valid_X, valid_Y)
test_dataset = PPGDataset(test_X, test_Y)

print(type(train_X), type(train_Y))
print(train_X.shape, train_Y.shape)
# print(len(train_X), len(train_Y))
print(len(train_X[0]), train_X[0])
# print(train_Y[:20])
print()
print(type(valid_X), type(valid_Y))
print(valid_X.shape, valid_Y.shape)
# print(len(valid_X), len(valid_Y))
print(len(valid_X[0]), valid_X[0])
# print(valid_Y[:20])
print()
print(type(test_X), type(test_Y))
print(test_X.shape, test_Y.shape)
# print(len(test_X), len(test_Y))
print(len(test_X[0]), test_X[0])
# print(test_Y[:20])

print(train_dataset)
print(val_dataset)
print(test_dataset)

# <class 'numpy.ndarray'> <class 'numpy.ndarray'>
# torch.Size([1892, 1, 3000]) torch.Size([1892, 1])

# <class 'numpy.ndarray'> <class 'numpy.ndarray'>
# torch.Size([631, 1, 3000]) torch.Size([631, 1])

# <class 'numpy.ndarray'> <class 'numpy.ndarray'>
# torch.Size([630, 1, 3000]) torch.Size([630, 1])

<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([1892, 1, 3000]) torch.Size([1892, 1])
1 tensor([[35.9744, 36.3694, 36.7644,  ..., 45.4540, 48.6139, 52.9587]])

<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([631, 1, 3000]) torch.Size([631, 1])
1 tensor([[29.2597, 28.8647, 28.0748,  ..., 26.0998, 26.0998, 26.0998]])

<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([630, 1, 3000]) torch.Size([630, 1])
1 tensor([[37.9493, 37.5543, 37.1594,  ..., 44.2690, 44.2690, 43.4791]])
<__main__.PPGDataset object at 0x7e37d386db10>
<__main__.PPGDataset object at 0x7e37a30a9b10>
<__main__.PPGDataset object at 0x7e38f2f37100>


In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

train_dataloader = DataLoader(
    train_dataset,
    batch_size=args.batch_size,
    num_workers=multiprocessing.cpu_count() // 2,
    shuffle=True,
    pin_memory=use_cuda,
    drop_last=True,
    )

val_dataloader = DataLoader(
    val_dataset,
    batch_size=args.batch_size,
    num_workers=multiprocessing.cpu_count() // 2,
    shuffle=False,
    pin_memory=use_cuda,
    drop_last=True,
    )

test_dataloader = DataLoader(
    test_dataset,
    shuffle=False
)

print(device)
print(train_dataloader)
print(val_dataloader)
print(test_dataloader)

cuda
<torch.utils.data.dataloader.DataLoader object at 0x7e37a2edb9a0>
<torch.utils.data.dataloader.DataLoader object at 0x7e37a2edb2e0>
<torch.utils.data.dataloader.DataLoader object at 0x7e37a2edbf10>


### # wandb

In [None]:
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "notebook name here"

# !pip install wandb -qqq
import wandb
wandb.login()

True

In [None]:
# -- wandb initialize with configuration
config={
    "epochs": args.epochs,
    "batch_size": args.batch_size,
    "learning_rate" : args.lr,
    "architecture" : args.model,
    "loss" : args.criterion
}
wandb.init(project="KAIST GSDS Microdegree - Hypotension", name = str(save_dir.split('/')[-1])+str(args.model)+str(args.epochs), config=config)

### # Model

In [None]:
# make model
# device_str = "cuda"
# device = torch.device(device_str if torch.cuda.is_available() else "cpu")

## change the hyper-parameters for your own data
# (n_block, downsample_gap, increasefilter_gap) = (8, 1, 2)
# 34 layer (16*2+2): 16, 2, 4
# 98 layer (48*2+2): 48, 6, 12

model = ResNet1D(
    in_channels=1, # 3000,
    base_filters=128, # 128, # 64 for ResNet1D, 352 for ResNeXt1D
    kernel_size= 16, # kernel_size,
    stride=2, # stride,
    groups=32,
    n_block=16, # 48, # n_block=48,
    n_classes=1, # 3, 4,
    downsample_gap=2, # 6, # downsample_gap,
    increasefilter_gap=4, # 12, # increasefilter_gap,
    use_do=True)
model.to(device)
print(train_X.shape, train_Y.shape)
summary(model, (train_X.shape[1], train_X.shape[2])) # device=device
# exit()

model.verbose = False # True
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
# loss_func = torch.nn.CrossEntropyLoss()
criterion = create_criterion(args.criterion)  # default: cross_entropy

logger = SummaryWriter(log_dir=save_dir)
with open(os.path.join(save_dir, 'config.json'), 'w', encoding='utf-8') as f:
    json.dump(vars(args), f, ensure_ascii=False, indent=4)


torch.Size([1892, 1, 3000]) torch.Size([1892, 1])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1            [-1, 128, 3000]           2,176
   MyConv1dPadSame-2            [-1, 128, 3000]               0
       BatchNorm1d-3            [-1, 128, 3000]             256
              ReLU-4            [-1, 128, 3000]               0
            Conv1d-5            [-1, 128, 3000]           8,320
   MyConv1dPadSame-6            [-1, 128, 3000]               0
       BatchNorm1d-7            [-1, 128, 3000]             256
              ReLU-8            [-1, 128, 3000]               0
           Dropout-9            [-1, 128, 3000]               0
           Conv1d-10            [-1, 128, 3000]           8,320
  MyConv1dPadSame-11            [-1, 128, 3000]               0
       BasicBlock-12            [-1, 128, 3000]               0
      BatchNorm1d-13            [-1, 128, 3000]      

### # Train

In [None]:
# for batch_idx, train_batch in enumerate(train_dataloader):
#     print('# ', batch_idx)

In [None]:
# train
# best_val_acc = 0
# best_val_loss = np.inf
best_mae = 0
# for _ in tqdm(range(args.epochs), desc="epoch", leave=False):
for epoch in range(args.epochs):

    # train loop
    model.train()
    # loss_value = 0
    # matches = 0
    avg_cost = 0
    # prog_iter = tqdm(train_dataloader, desc="Training", leave=False)
    print("Training...")
    # for batch_idx, train_batch in enumerate(prog_iter):
    for batch_idx, train_batch in enumerate(train_dataloader):
        input_x, input_y = tuple(t.to(device) for t in train_batch)
        # input_x, input_y = tuple(t for t in train_batch)
        # input_x = input_x.to(device).float()
        # input_y = input_y.to(device).long()

        optimizer.zero_grad()

        # preds = model(input_x)
        outs = model(input_x)
        # preds = torch.argmax(outs, dim=-1)
        # print('outs : ', type(outs), outs.dtype, outs.shape, outs)
        # print('preds : ', type(preds), preds.dtype, preds.shape, preds)
        # print('input_y : ', type(input_y), input_y.dtype, input_y.shape, input_y)

        # input_y = input_y.squeeze_() # classification
        # print('input_y (squeezed): ', type(input_y), input_y.dtype, input_y.shape, input_y)

        # loss = loss_func(preds, input_y)
        # loss = criterion(outs, input_y) # classification
        loss = criterion(outs.to(torch.float32), input_y.to(torch.float32)) # regression
        # optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # loss_value += loss.item()
        # matches += (preds == input_y).sum().item()
        # if (batch_idx + 1) % args.log_interval == 0:
        #     train_loss = loss_value / args.log_interval
        #     train_acc = matches / args.batch_size / args.log_interval
        #     current_lr = get_lr(optimizer)
        #     print(
        #         f"Epoch[{epoch}/{args.epochs}]({batch_idx + 1}/{len(train_dataloader)}) || "
        #         f"training loss {train_loss:4.4} || training accuracy {train_acc:4.2%} || lr {current_lr}"
        #     )
        #     logger.add_scalar("Train/loss", train_loss, epoch * len(train_dataloader) + batch_idx)
        #     logger.add_scalar("Train/accuracy", train_acc, epoch * len(train_dataloader) + batch_idx)

        #     loss_value = 0
        #     matches = 0

        avg_cost += loss/len(train_dataloader)

    if epoch % args.log_interval == 0:
        print(f'Epoch [{epoch+1}/{args.epochs}], train loss :', '{:.4f}'.format(avg_cost))

    # logging wandb train phase
    wandb.log({
        # 'Train acc': train_acc,
        # 'Train loss': train_loss
        'Train loss': avg_cost
    })
    # scheduler.step(_)
    scheduler.step(loss)

    # val loop
    with torch.no_grad():

        model.eval()
        # val_loss_items = []
        # val_acc_items = []
        all_probs = []
        all_labels = []
        # prog_iter_test = tqdm(val_dataloader, desc="Testing", leave=False)
        print("Calculating validation results...")
        # for batch_idx, val_batch in enumerate(prog_iter_test):
        for val_batch in val_dataloader:
            input_x, input_y = tuple(t.to(device) for t in val_batch)
            # input_x, input_y = tuple(t for t in val_batch)
            # input_x = input_x.to(device).float()
            # input_y = input_y.to(device).long()

            # preds = model(input_x)
            outs = model(input_x)
            # preds = torch.argmax(outs, dim=-1)

            # input_y = input_y.squeeze_()

            # # loss_item = loss_func(preds, input_y).item()
            # # loss_item = criterion(outs, input_y).item() # classification
            # loss_item = criterion(outs.to(torch.float32), input_y.to(torch.float32)).item() # regression
            # acc_item = (input_y == preds).sum().item()
            # val_loss_items.append(loss_item)
            # val_acc_items.append(acc_item)

            all_probs.extend(outs.cpu().numpy())
            all_labels.extend(input_y.cpu().numpy())

    # val_loss = np.sum(val_loss_items) / len(val_dataloader)
    # val_acc = np.sum(val_acc_items) / len(val_dataset)
    # best_val_loss = min(best_val_loss, val_loss)
    # if val_acc > best_val_acc:
    #     print(f"New best model for val accuracy : {val_acc:4.2%}! saving the best model..")
    #     torch.save(model.state_dict(), f"{save_dir}/best.pth")
    #     best_val_acc = val_acc
    # torch.save(model.state_dict(), f"{save_dir}/last.pth")
    # print(
    #     f"[Val] acc : {val_acc:4.2%}, loss: {val_loss:4.2} || "
    #     f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
    # )
    # logger.add_scalar("Val/loss", val_loss, epoch)
    # logger.add_scalar("Val/accuracy", val_acc, epoch)
    # print()

    diff_y = [i-j for i, j in zip(all_labels, all_probs)]
    mae = np.mean(np.abs(diff_y))
    print("MAE Score:", mae)

    # auc_score = roc_auc_score(all_labels, all_probs)
    print(f'Epoch [{epoch+1}/{args.epochs}], Validation MAE: {mae}%')

    best_mae = min(best_mae, mae)
    if mae > best_mae:
        print(f"New best model for val mae : {mae}%! saving the best model..")
        torch.save(model.state_dict(), f"{save_dir}/best.pth")
        best_mae = mae
    torch.save(model.state_dict(), f"{save_dir}/last.pth")

    # logging wandb valid phase
    wandb.log({
        # 'Valid acc': val_acc,
        # 'Valid loss': val_loss
        'Valid MAE': mae
    })

wandb.finish()


Training...
Epoch [1/100], train loss : 6286.4658
Calculating validation results...
MAE Score: 84.10971898751126
Epoch [1/100], Validation MAE: 84.10971898751126%
New best model for val mae : 84.10971898751126%! saving the best model..
Training...
Calculating validation results...
MAE Score: 69.8792400823699
Epoch [2/100], Validation MAE: 69.8792400823699%
Training...
Calculating validation results...
MAE Score: 36.766583363215126
Epoch [3/100], Validation MAE: 36.766583363215126%
Training...
Calculating validation results...
MAE Score: 32.82370095120536
Epoch [4/100], Validation MAE: 32.82370095120536%
Training...
Calculating validation results...
MAE Score: 20.378891507784527
Epoch [5/100], Validation MAE: 20.378891507784527%
Training...
Calculating validation results...
MAE Score: 15.913056956397163
Epoch [6/100], Validation MAE: 15.913056956397163%
Training...
Calculating validation results...
MAE Score: 13.480724785063002
Epoch [7/100], Validation MAE: 13.480724785063002%
Training

VBox(children=(Label(value='0.000 MB of 0.038 MB uploaded\r'), FloatProgress(value=0.0, max=1.0)))

0,1
Train loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Valid MAE,█▄▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Train loss,123.97903
Valid MAE,9.2542
