In [1]:
!pip install pytorch-lightning
!pip install transformers

Collecting pytorch-lightning
  Downloading pytorch_lightning-1.5.2-py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 5.1 MB/s 
[?25hCollecting future>=0.17.1
  Downloading future-0.18.2.tar.gz (829 kB)
[K     |████████████████████████████████| 829 kB 40.5 MB/s 
Collecting fsspec[http]!=2021.06.0,>=2021.05.0
  Downloading fsspec-2021.11.0-py3-none-any.whl (132 kB)
[K     |████████████████████████████████| 132 kB 56.7 MB/s 
Collecting pyDeprecate==0.3.1
  Downloading pyDeprecate-0.3.1-py3-none-any.whl (10 kB)
Collecting torchmetrics>=0.4.1
  Downloading torchmetrics-0.6.0-py3-none-any.whl (329 kB)
[K     |████████████████████████████████| 329 kB 60.6 MB/s 
Collecting PyYAML>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 50.2 MB/s 
[?25hCollecting aiohttp
  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylin

In [2]:
import sys, os
import numpy as np
import pandas as pd
import pickle
import torch.nn.functional as F
import pytorch_lightning as pl
import torch
from torch.utils.data import DataLoader
from torch import optim
from dataclasses import dataclass, asdict
from datetime import datetime
from time import time
from sklearn.model_selection import KFold
from tqdm.notebook import tqdm
from pytorch_lightning.loggers import TensorBoardLogger
from transformers import AdamW
from transformers import get_cosine_schedule_with_warmup
import torch.cuda.amp as amp

# workplace = '/content/drive/MyDrive/kaggle/codes/ventilator-pressure-prediction/'
# sys.path.append(workplace)
# from models.torch_lstm import simpleLSTM, TsLSTM, embedLSTM, dualDeepLSTM
# from utils import load_json, save_json
# from datasets import simpleData 
# from functions import eval_metrics, torch_loss_metrics
# from utils import fixed_seed, worker_init_fn
# from functions.task import SequentialTrain
# from config import torch_model_conf

In [3]:
print(torch.__version__)
print(pl.__version__)
import transformers
print(transformers.__version__)

1.10.0+cu111
1.5.2
4.12.5


In [None]:
_supported_model = {
    'simple-lstm':  simpleLSTM,
    'embedded': embedLSTM,
    'transformer': TsLSTM,
    'dualdeep': dualDeepLSTM,
}

_supported_criterion = {
    "mse": F.mse_loss,
    "mae": F.l1_loss,
}

_supported_scheduler = {
    'reduce_plateau':   optim.lr_scheduler.ReduceLROnPlateau,
    'cosine':   optim.lr_scheduler.CosineAnnealingLR,
}

_supported_optimizer = {
    'adam': optim.Adam,
    'sgd':  optim.SGD,
}

_supported_dataset = {
    'simple': simpleData,
}

In [None]:
@dataclass
class reduce_plateau_conf():
    factor: float = 0.5
    patience: int = 15

@dataclass
class cosine_anneal_conf():
    eta_min: float = 1e-4
    T_max: int = 50

@dataclass
class train_conf():
    seed: int = 221

    model_name: str = 'embedded'
    model_param: torch_model_conf.embed_lstm() = torch_model_conf.embed_lstm(in_dim=23)

    criterion: str = 'mae'

    csv_dir: str = '/content/drive/MyDrive/kaggle/dataset/ventilator-pressure-prediction/train.csv'

    # training parameters
    batch_size: int = 128
    num_workers: int = 2
    epoch: int = 500

    # optimizer paramaters
    opt_name: str = 'adam'
    lr: float = 1e-3
    decay: int = -1

    monitor: str = 'avg_val_loss'

    early_stop_patience: int = 50

    # scheduler parameters
    scheduler_name: str = 'reduce_plateau'
    scheduler_param: reduce_plateau_conf = reduce_plateau_conf()
    frequency: int = 1

In [None]:
def process_test():
    df = pd.read_csv('/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/train.csv')
    print(len(df['breath_id'].unique()))
    x_col=['time_step','u_in','u_out','R','C',]
    y_col = ['pressure']
    idx = [i for i in range(100)]
    dataset = simpleData(df, idx, x_col, y_col)
    loader = DataLoader(dataset, 16)
    x, _, t = dataset[0]
    print(x.shape, t.shape)

    model = _supported_model['embedded'](**asdict(torch_model_conf.embed_lstm(in_dim=5)))
    model.to(0)

    criterion = F.l1_loss
    efunc = eval_metrics.mae

    for batch in loader:
        x, _, t = batch
        print(x.shape, t.shape)

        x = x.to(0)
        y, _ = model(x)
        print(y.shape)

        loss = criterion(y.cpu(), t)
        print(loss)

        y = y.cpu().detach().numpy()
        t = t.cpu().detach().numpy()
        print(efunc(y, t))
        break

# process_test()

In [None]:
def setup_dataset(dname, csv_dir, data_cfg, tidx, vidx, x_col):
    df = pd.read_csv(csv_dir)
    y_col = ['pressure']
    train_dataset = _supported_dataset[dname](df, tidx, x_col, y_col, **asdict(data_cfg))
    valid_dataset = _supported_dataset[dname](df, vidx, x_col, y_col, **asdict(data_cfg))

    return train_dataset, valid_dataset

def setup_task(cfg, tidx, vidx):
    train_data, valid_data = setup_dataset(cfg.data_name, cfg.csv_dir, cfg.dataset_param, tidx, vidx)

    train_loader = DataLoader(
            train_data,
            cfg.batch_size,
            shuffle = True,
            drop_last = True,
            num_workers = cfg.num_workers,
            pin_memory = True,
            worker_init_fn = worker_init_fn)

    valid_loader = DataLoader(
            valid_data,
            cfg.batch_size,
            shuffle = False,
            num_workers = cfg.num_workers,
            pin_memory=True)

    model = _supported_model[cfg.model_name](**asdict(cfg.model_param))

    criterion = _supported_criterion[cfg.criterion]
    efunc = eval_metrics.mae

    optimizer = _supported_optimizer[cfg.opt_name](model.parameters(), lr=cfg.lr, weight_decay=max(0, cfg.decay))

    scheduler = {
        "scheduler": _supported_scheduler[cfg.scheduler_name](optimizer, **asdict(cfg.scheduler_param), verbose=True),
        "monitor": cfg.monitor,
        "interval": 'epoch',
        "frequency": cfg.frequency,
        }

    return SequentialTrain(
        model = model,
        criterion = criterion,
        optimizers = optimizer,
        train_loader = train_loader,
        valid_loader = valid_loader,
        eval_func = efunc,
        schedulers = scheduler
    )

def setup_trainer(cfg, out_dir):
    callbacks = []
    callbacks.append(
        pl.callbacks.ModelCheckpoint(
                monitor = cfg.monitor,
                save_weights_only=True,
                filename='best',
                auto_insert_metric_name=False,
                save_top_k = 1,
                save_last = True, 
                verbose = True,
                mode = 'min',
                dirpath = f'{out_dir}ckpt/'))
    
    callbacks.append(
        pl.callbacks.EarlyStopping(
            monitor = cfg.monitor,
            patience = cfg.early_stop_patience,
            verbose = False,
            mode = 'min',
            min_delta = 0.0))

    logger = TensorBoardLogger(f'{out_dir}logs/')

    return pl.Trainer(gpus=[0], max_epochs=cfg.epoch, callbacks=callbacks, logger=logger, progress_bar_refresh_rate=1)

In [None]:
def apply_lightning():
    cfg = train_conf()
    now = datetime.now().strftime("%m%d%H%M%S")
    out = f'{workplace}logs/{cfg.model_name}/{now}/'
    os.makedirs(out, exist_ok=True)

    fixed_seed(cfg.seed)

    k = 4 
    n = 75450
    indicies = np.arange(n)
    cv = KFold(n_splits=k, shuffle=True, random_state=cfg.seed)

    for i, idx in enumerate(cv.split(indicies)):
        train_idx, valid_idx = idx

        train_idx = indicies[train_idx]
        valid_idx = indicies[valid_idx]

        out_i = f'{out}{i}/'
        os.makedirs(out_i, exist_ok=True)
        print(out_i)
        pickle.dump(train_idx, open(f'{out_i}train_idx.pkl', 'wb'))
        pickle.dump(valid_idx, open(f'{out_i}valid_idx.pkl', 'wb'))
        
        task = setup_task(cfg, train_idx, valid_idx)
        trainer = setup_trainer(cfg, out_i)
        trainer.fit(task)

In [None]:
def train_step(model, loader, optimizer, scheduler, epoch):
    start_time = time()
    avg_loss = 0
    itr_cnt = 0

    model.train()
    for batch in loader:
        optimizer.zero_grad()
        itr_cnt += 1
        x, u_out, t = batch
        x, u_out, t = x.to(0), u_out.to(0), t.to(0)
        y_in, y_out = model(x)
        y = y_in*(1-u_out) + y_out*u_out

        loss0 = F.l1_loss(y, t)
        loss1 = loss_metrics.mask_l1_loss(y_in, t, u_out < 0.5)
        loss2 = loss_metrics.mask_l1_loss(y_in, t, u_out > 0.5)
        loss = loss0 + loss1 + loss2

        avg_loss += loss.item()

        loss.backward()
        optimizer.step()
        # scheduler.step()
    
    print(f'epoch {epoch}: avg_train_loss {avg_loss / itr_cnt}, elapsed_time {time() - start_time}s')

def valid_step(model, loader):
    model.eval()
    avg_loss = 0
    avg_mae = 0 
    avg_masked_mae = 0 
    itr_cnt = 0

    model.eval()
    with torch.no_grad():
        for batch in loader:
            itr_cnt += 1
            x, u_out, t = batch
            x, u_out, t = x.to(0), u_out.to(0), t.to(0)

            y_in, y_out = model(x)
            y = y_in*(1-u_out) + y_out*u_out

            loss0 = F.l1_loss(y, t)
            loss1 = loss_metrics.mask_l1_loss(y_in, t, u_out < 0.5)
            loss2 = loss_metrics.mask_l1_loss(y_in, t, u_out > 0.5)
            loss = loss0 + loss1 + loss2

            avg_mae += F.l1_loss(y, t).item()
            avg_masked_mae += loss_metrics.mask_l1_loss(y, t, u_out < 0.5)
    
            avg_loss += loss.item()
    
    avg_loss /= itr_cnt
    avg_mae /= itr_cnt
    avg_masked_mae /= itr_cnt

    print(f'\t valid_loss {avg_loss}, mae {avg_mae}, masked_mae {avg_masked_mae}')

    return avg_loss

In [None]:
def train_loop(cfg, tidx, vidx, x_col, out):
    train_data, valid_data = setup_dataset(cfg.data_name, cfg.csv_dir, cfg.dataset_param, tidx, vidx, x_col)

    train_loader = DataLoader(
            train_data,
            cfg.batch_size,
            shuffle = True,
            drop_last = True,
            num_workers = cfg.num_workers,
            pin_memory = True,
            worker_init_fn = worker_init_fn)

    valid_loader = DataLoader(
            valid_data,
            cfg.batch_size,
            shuffle = False,
            num_workers = cfg.num_workers,
            pin_memory=True)
    
    model = _supported_model[cfg.model_name](**asdict(cfg.model_param))
    model.to(0)
    optimizer = _supported_optimizer[cfg.opt_name](model.parameters(), lr=cfg.lr, weight_decay=max(0, cfg.decay))

    # num_train_steps = int(len(train_loader) * cfg.epoch)
    # num_warmup_steps = int(num_train_steps / 10) 

    scheduler = _supported_scheduler[cfg.scheduler_name](optimizer, **asdict(cfg.scheduler_param), verbose=True)

    best_model = None
    best_score = 10000000000
    not_update = 0

    for ep in range(cfg.epoch):
        train_step(model, train_loader, optimizer, scheduler, ep)
        val_loss = valid_step(model, valid_loader)
        scheduler.step(val_loss)

        if val_loss < best_score:
            print('update best model')
            not_update = 0
            best_score = val_loss
            best_model = model.state_dict()
            torch.save(model.state_dict(), f'{out}best_model')
        else :
            not_update += 1
        
        if not_update >= cfg.early_stop_patience:
            print('early stop triggered')
            break
    
    torch.cuda.empty_cache()
    return best_model

def apply():
    cfg = train_conf()
    now = datetime.now().strftime('%m%d%H%M%S')
    out = f'{workplace}logs/{cfg.model_name}/{now}/'
    os.makedirs(out, exist_ok=True)
    save_json(asdict(cfg), out + 'params.json')

    fixed_seed(cfg.seed)

    x_cols=['time_step','u_in','u_out','R','C',]
    pickle.dump(x_cols, open(f'{out}x_cols.pkl', 'wb'))

    k = 10
    n = 75450
    indicies = np.arange(n)
    cv = KFold(n_splits=k, shuffle=True, random_state=cfg.seed)

    for i, idx in enumerate(cv.split(indicies)):
        print('###', i+1, '###')
        train_idx, valid_idx = idx

        train_idx = indicies[train_idx]
        valid_idx = indicies[valid_idx]

        out_i = f'{out}{i}/'
        os.makedirs(out_i, exist_ok=True)
        pickle.dump(train_idx, open(f'{out_i}train_idx.pkl', 'wb'))
        pickle.dump(valid_idx, open(f'{out_i}valid_idx.pkl', 'wb'))

        best = train_loop(cfg, train_idx, valid_idx, x_cols, out_i)
        torch.save(best, f'{out_i}best_last')
