In [1]:
import sys
from time import time
import numpy as np
import pandas as pd
from pathlib import Path
import lightgbm as lgb
import matplotlib.pyplot as plt 
import seaborn as sns
from tqdm import tqdm
import copy
import wandb
from collections import OrderedDict

from sklearn.metrics import mean_absolute_error
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as torchdata

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import WandbLogger


In [2]:
sys.path.append('../../src/')
import utils as utils
from utils import Timer

In [3]:
class CFG:
    seed = 42
    exp_num = 34
    local = True
    n_folds = 5
    folds = [0]
    debug = False
    bias = 1000
    epochs = 200

    
    ######################
    # Dataset #
    ######################
    transforms = {
        "train": [{"name": ""}],
        "valid": [{"name": ""}],
        "test": [{"name": ""}]
    }

    ######################
    # Loaders #
    ######################
    loader_params = {
        "train": {
            'batch_size': 128,
            'shuffle': True,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': True,
        },
        "valid": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        },
        "test": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        }
    }

    ######################
    # Split #
    ######################
    split = "GroupKFold"
    split_params = {
        "n_splits": 5,
    }

    ######################
    # Model #
    ######################
    input_dim = 5

    dense_dim = 512
    lstm_dim = 512
    logit_dim = 512
    num_classes = 1

    ######################
    # Criterion #
    ######################
#     loss_name = "rmspe_loss"
#     loss_params: dict = {}

    ######################
    # Optimizer #
    ######################
    optimizer_name = "AdamW"
    optimizer_params = {
        "lr": 0.001,
        'weight_decay': 1e-6
    }

    ######################
    # Scheduler #
    ######################
    scheduler_name = "CosineAnnealingWarmRestarts"
    scheduler_params = {
        'T_0': 40, 
        'T_mult': 1
    }

In [4]:
utils.set_seed(CFG.seed)

In [5]:
if CFG.local:
    DATA_DIR = Path("/home/knikaido/work/Ventilator-Pressure-Prediction/data/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('./output/')
else:
    DATA_DIR = Path("../input/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('')   

In [6]:
def get_transforms(phase: str):
    transforms = CFG.transforms
    if transforms is None:
        return None
    else:
        if transforms[phase] is None:
            return None
        trns_list = []
        for trns_conf in transforms[phase]:
            trns_name = trns_conf["name"]
            trns_params = {} if trns_conf.get("params") is None else \
                trns_conf["params"]
            if globals().get(trns_name) is not None:
                trns_cls = globals()[trns_name]
                trns_list.append(trns_cls(**trns_params))

        if len(trns_list) > 0:
            return Compose(trns_list)
        else:
            return None
        
        
class Normalize:
    def __call__(self, y: np.ndarray):
        max_vol = np.abs(y).max()
        y_vol = y * 1 / max_vol
        return np.asfortranarray(y_vol)


class Compose:
    def __init__(self, transforms: list):
        self.transforms = transforms

    def __call__(self, y: np.ndarray):
        for trns in self.transforms:
            y = trns(y)
        return y

In [7]:
def compute_metric(preds, trues, u_outs):
    """
    Metric for the problem, as I understood it.
    """
    
    y = trues
    w = 1 - u_outs
    
    assert y.shape == preds.shape and w.shape == y.shape, (y.shape, preds.shape, w.shape)
    
    mae = w * np.abs(y - preds)
    mae = mae.sum() / w.sum()
    
    return mae


class VentilatorLoss(nn.Module):
    """
    Directly optimizes the competition metric
    """
    def __call__(self, preds, y, u_out):
        w = 1 - u_out
        mae = w * (y - preds).abs()
        mae = mae.sum(-1) / w.sum(-1)

        return mae

In [8]:
def get_criterion():
    return VentilatorLoss()

In [9]:
# Custom optimizer
__OPTIMIZERS__ = {}


def get_optimizer(model: nn.Module):
    optimizer_name = CFG.optimizer_name
    if optimizer_name == "SAM":
        base_optimizer_name = CFG.base_optimizer
        if __OPTIMIZERS__.get(base_optimizer_name) is not None:
            base_optimizer = __OPTIMIZERS__[base_optimizer_name]
        else:
            base_optimizer = optim.__getattribute__(base_optimizer_name)
        return SAM(model.parameters(), base_optimizer, **CFG.optimizer_params)

    if __OPTIMIZERS__.get(optimizer_name) is not None:
        return __OPTIMIZERS__[optimizer_name](model.parameters(),
                                              **CFG.optimizer_params)
    else:
        return optim.__getattribute__(optimizer_name)(model.parameters(),
                                                      **CFG.optimizer_params)


def get_scheduler(optimizer):
    scheduler_name = CFG.scheduler_name

    if scheduler_name is None:
        return
    else:
        return optim.lr_scheduler.__getattribute__(scheduler_name)(
            optimizer, **CFG.scheduler_params)

In [10]:
# validation
splitter = getattr(model_selection, CFG.split)(**CFG.split_params)

In [11]:
class VentilatorDataset(torchdata.Dataset):
    def __init__(self, df, train_value_col, train_category_col):
        if "pressure" not in df.columns:
            df['pressure'] = 0
        self.df = df
        self.groups = df.groupby('breath_id').groups
        self.keys = list(self.groups.keys())
        self.train_value_col = train_value_col
        self.train_category_col = train_category_col

        
    def __len__(self):
        return len(self.groups)

    def __getitem__(self, idx):
        indexes = self.groups[self.keys[idx]]
        df_ = self.df.iloc[indexes]
        
        input_value = df_[self.train_value_col].values
        input_category = df_[self.train_category_col].values

        u_out_ = df_['u_out'].values
        p_ = df_['pressure'].values

        data = {
            "input_value": input_value.astype(np.float32),
            "input_category": input_category.astype(int),
            "u_out": u_out_.astype(np.float32),
            "p": p_.astype(np.float32),
        }
        
        return data

In [12]:
class RNNModel(nn.Module):
    def __init__(
        self,
        input_dim=4,
        lstm_dim=256,
        dense_dim=256,
        logit_dim=256,
        num_classes=1,
    ):
        super().__init__()
        
        self.rc_emb = nn.Embedding(9, 4, padding_idx=0)
        
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, dense_dim // 2),
            nn.LayerNorm(dense_dim // 2),
            nn.ReLU(),
            nn.Linear(dense_dim // 2, dense_dim),
            nn.LayerNorm(dense_dim),
            nn.ReLU(),
        )

        self.conv_basic = nn.Sequential(
            nn.Conv1d(in_channels=dense_dim+4, out_channels=dense_dim+4, kernel_size=3, padding=1),
            nn.LayerNorm(80),
            nn.ReLU(),
            nn.Conv1d(in_channels=dense_dim+4, out_channels=dense_dim+4, kernel_size=3, padding=1),
            nn.LayerNorm(80),
            nn.ReLU(),
        )

        self.lstm = nn.LSTM(dense_dim+4, lstm_dim, num_layers=2, batch_first=True, bidirectional=True)

        self.logits = nn.Sequential(
            nn.Linear(lstm_dim * 2, logit_dim),
            nn.ReLU(),
            nn.Linear(logit_dim, num_classes),
        )     
        
        # nakamaさんの初期化
        for n, m in self.named_modules():
            if isinstance(m, nn.LSTM):
                print(f'init {m}')
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        nn.init.orthogonal_(param.data)
                    else:
                        nn.init.normal_(param.data)
            elif isinstance(m, nn.GRU):
                print(f"init {m}")
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        init.orthogonal_(param.data)
                    else:
                        init.normal_(param.data)

    def forward(self, cont_seq_x, cate_seq_x):
        bs = cont_seq_x.size(0)
        rc_emb = self.rc_emb(cate_seq_x).view(bs, 80, -1)
        
        features = self.mlp(cont_seq_x)
        features = torch.cat((rc_emb, features), 2)

        features = self.conv_basic(features.permute([0, 2, 1]))
        features, _ = self.lstm(features.permute([0, 2, 1]))
        
        pred = self.logits(features)
        return pred

In [13]:
# Learner class(pytorch-lighting)
class Learner(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.criterion = get_criterion()
    
    def training_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input_value'], d_['input_category'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        return loss
    
    def validation_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input_value'], d_['input_category'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        
        self.log(f'Loss/val', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
        
        output = OrderedDict({
            "targets": d_['p'].detach(), "preds": output.detach(), "u_outs": d_['u_out'].detach(), "loss": loss.detach()
        })
        return output
    
    def validation_epoch_end(self, outputs):

        targets = torch.cat([o["targets"].view(-1) for o in outputs]).cpu().numpy()
        preds = torch.cat([o["preds"].view(-1) for o in outputs]).cpu().numpy()
        u_outs = torch.cat([o["u_outs"].view(-1) for o in outputs]).cpu().numpy()

        score = get_score(preds, targets, u_outs)
        self.log(f'custom_mae/val', score, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        print(f'epoch = {self.current_epoch}, custom_mae = {score}')

    def configure_optimizers(self):
        optimizer = get_optimizer(self.model)
        scheduler = get_scheduler(optimizer)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "Loss/val"}

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
def get_score(y_pred, y_true, u_outs):
    return compute_metric(y_pred, y_true, u_outs)


def to_np(input):
    return input.detach().cpu().numpy()

# oof
def evaluate(model, loaders, phase):
    model.eval()
    pred_list = []
    target_list = []
    with torch.no_grad():
        for batch in loaders[phase]:
            d_ = batch
            d_['input_value'] = d_['input_value'].to(device)
            d_['input_category'] = d_['input_category'].to(device)
            output = model(d_['input_value'], d_['input_category'])
#             output = nn.Softmax(dim=1)(output)
            pred_list.append(to_np(output))
            target_list.append(to_np(d_['p']))

    pred_list = np.concatenate(pred_list).reshape(-1)
    target_list = np.concatenate(target_list).reshape(-1)
    model.train()
    return pred_list, target_list

In [16]:
train = pd.read_csv(DATA_DIR / 'train.csv')
test = pd.read_csv(DATA_DIR / 'test.csv')
display(train), display(test)

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.000000,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.355850,0,12.234987
...,...,...,...,...,...,...,...,...
6035995,6035996,125749,50,10,2.504603,1.489714,1,3.869032
6035996,6035997,125749,50,10,2.537961,1.488497,1,3.869032
6035997,6035998,125749,50,10,2.571408,1.558978,1,3.798729
6035998,6035999,125749,50,10,2.604744,1.272663,1,4.079938


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.000000,0.000000,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.230610,0
4,5,0,5,20,0.127644,26.320956,0
...,...,...,...,...,...,...,...
4023995,4023996,125748,20,10,2.530117,4.971245,1
4023996,4023997,125748,20,10,2.563853,4.975709,1
4023997,4023998,125748,20,10,2.597475,4.979468,1
4023998,4023999,125748,20,10,2.631134,4.982648,1


(None, None)

In [17]:
def get_raw_features(input_df, dataType = 'train'):
    colum = ['time_step', 'u_in', 'R', 'C']

    return input_df[colum]

In [18]:
def get_category_features(input_df, dataType = 'train'):
    output_df = copy.deepcopy(input_df)
    colum = ['R_C']
    rc_map = {'5_10': 0, '5_20': 1, '5_50': 2, '20_10': 3, '20_20': 4, '20_50': 5, '50_10': 6, '50_20': 7, '50_50': 8}
    
    output_df['R_C'] = [f'{r}_{c}' for r, c in zip(output_df['R'], output_df['C'])]
    output_df['R_C'] = output_df['R_C'].map(rc_map)

    return output_df[colum]

In [19]:
def get_diff_shift_features(input_df, dataType = 'train'):
    
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    b_id_gby = input_df.groupby(['breath_id'])
    shift_idx = [-2, -1, 1, 2, 3, 4]
    
    # diffより直接引いたほうが早い
    for i in shift_idx:
        output_df[f'u_in_shift_{i}'] = b_id_gby['u_in'].shift(i)
        output_df[f'u_in_diff_{i}'] = output_df['u_in'] - output_df[f'u_in_shift_{i}']

        output_df[f'time_step_shift_{i}'] = b_id_gby['time_step'].shift(i)
        output_df[f'time_step_diff_{i}'] = output_df['time_step'] - output_df[f'time_step_shift_{i}']
    
    return output_df.iloc[:, c_num:]

In [20]:
def get_cum_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    b_id_gby = input_df.groupby(['breath_id'])
    
    output_df['u_in_cumsum'] = b_id_gby['u_in'].cumsum()
    output_df['time_step_cumsum'] = b_id_gby['time_step'].cumsum()
    
    b_id_gby = output_df.groupby(['breath_id'])    
    shift_idx = [1, 2]
    
    # diffより直接引いたほうが早い
    for i in shift_idx:
        output_df[f'u_in_cumsum_shift_{i}'] = b_id_gby['u_in_cumsum'].shift(i)
        output_df[f'u_in_cumsum_diff_{i}'] = output_df['u_in_cumsum'] - output_df[f'u_in_cumsum_shift_{i}']

        output_df[f'time_step_cumsum_shift_{i}'] = b_id_gby['time_step_cumsum'].shift(i)
        output_df[f'time_step_cumsum_diff_{i}'] = output_df['time_step_cumsum'] - output_df[f'u_in_cumsum_diff_{i}']
    
    return output_df.iloc[:, c_num:]

In [22]:
def get_simple_calc_features(input_df, dataType = 'train'):
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    output_df['area'] = output_df['time_step'] * output_df['u_in']
    output_df['area'] = output_df.groupby('breath_id')['area'].cumsum()
    
    return output_df.iloc[:, c_num:]

In [23]:
def get_agg_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    # Dict for aggregations
    create_feature_dict = {
        'u_in': [np.max, np.std, np.mean, 'first', 'last'],
    }
    
    def get_agg_window(start_time=0, end_time=3.0, add_suffix = False):
        
        df_tgt = output_df[(output_df['time_step'] >= start_time) & (output_df['time_step'] <= end_time)]
        df_feature = df_tgt.groupby(['breath_id']).agg(create_feature_dict)
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(start_time) + '_' + str(end_time))
            
        return df_feature
    
    df_agg_feature = get_agg_window().reset_index()
    
#     df_tmp = get_agg_window(start_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(start_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')

    output_df = pd.merge(output_df, df_agg_feature, how='left', on='breath_id')
    
    output_df['u_in_diffmax'] = output_df['u_in_amax'] - output_df['u_in']
    output_df['u_in_diffmean'] = output_df['u_in_mean'] - output_df['u_in']
    
    return output_df.iloc[:, c_num:]

In [24]:
def to_feature(input_df, dataType = 'train'):
    """input_df を特徴量行列に変換した新しいデータフレームを返す.
    """

    processors = [
        get_raw_features,
        get_category_features,
        get_simple_calc_features,
        get_diff_shift_features,
        get_cum_features,
        get_agg_features
    ]

    out_df = pd.DataFrame()

    for func in tqdm(processors, total=len(processors)):
        with Timer(prefix='' + func.__name__ + ' '):
            _df = func(input_df, dataType)

        # 長さが等しいことをチェック (ずれている場合, func の実装がおかしい)
        assert len(_df) == len(input_df), func.__name__
        out_df = pd.concat([out_df, _df], axis=1)
#     out_df = utils.reduce_mem_usage(out_df)
    
    return out_df

In [25]:
train_df = to_feature(train, dataType = 'train')
test_df = to_feature(test, dataType = 'test')

  0%|          | 0/6 [00:00<?, ?it/s]

get_raw_features  0.030[s]


 33%|███▎      | 2/6 [00:02<00:04,  1.24s/it]

get_category_features  2.394[s]


 50%|█████     | 3/6 [00:02<00:02,  1.20it/s]

get_simple_calc_features  0.165[s]
get_diff_shift_features  2.028[s]


 67%|██████▋   | 4/6 [00:05<00:02,  1.50s/it]

get_cum_features  0.873[s]


 83%|████████▎ | 5/6 [00:07<00:01,  1.55s/it]

get_agg_features  1.222[s]


100%|██████████| 6/6 [00:09<00:00,  1.53s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

get_raw_features  0.019[s]


 50%|█████     | 3/6 [00:01<00:01,  1.86it/s]

get_category_features  1.552[s]
get_simple_calc_features  0.107[s]
get_diff_shift_features  1.225[s]


 67%|██████▋   | 4/6 [00:03<00:01,  1.07it/s]

get_cum_features  0.544[s]


 83%|████████▎ | 5/6 [00:04<00:00,  1.03it/s]

get_agg_features  0.805[s]


100%|██████████| 6/6 [00:05<00:00,  1.02it/s]


In [26]:
train_value_col = [i for i in train_df.columns.to_list() if i not in ['R_C']]
train_category_col = ['R_C']

In [27]:
ss = StandardScaler()

train_category = train_df[train_category_col]
train_df = pd.DataFrame(ss.fit_transform(train_df[train_value_col]), columns=train_value_col)
train_mean = train_df.mean()
train_df = train_df.fillna(train_df.mean())

test_category = test_df[train_category_col]
test_df = pd.DataFrame(ss.transform(test_df[train_value_col]), columns=train_value_col)
test_df = test_df.fillna(train_mean)

In [28]:
display(train_df), display(test_df)

Unnamed: 0,time_step,u_in,R,C,area,u_in_shift_-2,u_in_diff_-2,time_step_shift_-2,time_step_diff_-2,u_in_shift_-1,...,u_in_cumsum_diff_2,time_step_cumsum_shift_2,time_step_cumsum_diff_2,u_in_amax,u_in_std,u_in_mean,u_in_first,u_in_last,u_in_diffmax,u_in_diffmean
0,-1.706609,-0.538775,-0.359072,1.394522,-0.935020,1.239960e+00,-2.503819e+00,-1.703981e+00,-5.670750e-01,8.443515e-01,...,-1.287479e-16,9.062345e-17,1.155008e-16,-0.245401,0.119327,0.513555,-0.550080,0.281162,-0.015391,0.820945
1,-1.662676,0.823348,-0.359072,1.394522,-0.932038,1.263865e+00,-5.308005e-01,-1.658426e+00,-7.288073e-01,1.156962e+00,...,-1.287479e-16,9.062345e-17,1.155008e-16,-0.245401,0.119327,0.513555,-0.550080,0.281162,-0.616229,-0.672003
2,-1.618468,1.130480,-0.359072,1.394522,-0.924711,1.467130e+00,-3.577016e-01,-1.612622e+00,-8.800655e-01,1.179656e+00,...,1.068057e+00,-1.116535e+00,-1.383822e+00,-0.245401,0.119327,0.513555,-0.550080,0.281162,-0.751706,-1.008635
3,-1.574044,1.152777,-0.359072,1.394522,-0.913545,1.619080e+00,-5.335696e-01,-1.567182e+00,-8.432008e-01,1.372623e+00,...,1.245432e+00,-1.115415e+00,-1.479190e+00,-0.245401,0.119327,0.513555,-0.550080,0.281162,-0.761541,-1.033072
4,-1.529378,1.342362,-0.359072,1.394522,-0.896949,1.608516e+00,-2.398765e-01,-1.521650e+00,-7.557113e-01,1.516875e+00,...,1.359515e+00,-1.113168e+00,-1.538975e+00,-0.245401,0.119327,0.513555,-0.550080,0.281162,-0.845168,-1.240867
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035995,1.563202,-0.434092,1.171893,-0.937525,0.367140,-4.319748e-01,-5.327759e-02,1.648132e+00,-2.612726e-01,-4.356080e-01,...,-4.541618e-01,1.884368e+00,1.550025e+00,-0.334837,-0.430115,-0.438246,-0.313487,-3.249996,-0.153790,0.279152
6035996,1.606751,-0.434183,1.171893,-0.937525,0.385353,-4.548240e-01,-2.202768e-02,1.692762e+00,-2.523388e-01,-4.302682e-01,...,-4.514451e-01,1.966610e+00,1.604503e+00,-0.334837,-0.430115,-0.438246,-0.313487,-3.249996,-0.153750,0.279251
6035997,1.650417,-0.428937,1.171893,-0.937525,0.404680,-4.380590e-01,-3.732884e-02,1.737306e+00,-1.774798e-01,-4.519599e-01,...,-4.486692e-01,2.049963e+00,1.659686e+00,-0.334837,-0.430115,-0.438246,-0.313487,-3.249996,-0.156064,0.273501
6035998,1.693939,-0.450248,1.171893,-0.937525,0.420662,2.584907e-16,7.782393e-17,-1.094350e-16,2.408285e-15,-4.360442e-01,...,-4.573193e-01,2.134426e+00,1.721892e+00,-0.334837,-0.430115,-0.438246,-0.313487,-3.249996,-0.146664,0.296860


Unnamed: 0,time_step,u_in,R,C,area,u_in_shift_-2,u_in_diff_-2,time_step_shift_-2,time_step_diff_-2,u_in_shift_-1,...,u_in_cumsum_diff_2,time_step_cumsum_shift_2,time_step_cumsum_diff_2,u_in_amax,u_in_std,u_in_mean,u_in_first,u_in_last,u_in_diffmax,u_in_diffmean
0,-1.706609,-0.544978,-1.124554,-0.354513,-0.935020,6.128857e-01,-1.651673e+00,-1.708917e+00,1.020982e+00,2.097362e-02,...,-1.287479e-16,9.062345e-17,1.155008e-16,0.048467,0.367378,0.364698,-0.553396,0.267353,0.290369,0.760954
1,-1.664958,0.014398,-1.124554,-0.354513,-0.933864,1.137916e+00,-1.549065e+00,-1.666179e+00,1.012356e+00,5.616568e-01,...,-1.287479e-16,9.062345e-17,1.155008e-16,0.048467,0.367378,0.364698,-0.553396,0.267353,0.043626,0.147852
2,-1.623282,0.545607,-1.124554,-0.354513,-0.929355,1.544151e+00,-1.324769e+00,-1.623482e+00,1.025706e+00,1.060088e+00,...,3.175823e-01,-1.116535e+00,-9.709598e-01,0.048467,0.367378,0.364698,-0.553396,0.267353,-0.190693,-0.434376
3,-1.581604,1.035304,-1.124554,-0.354513,-0.919555,1.876616e+00,-1.060282e+00,-1.580758e+00,1.030121e+00,1.445741e+00,...,8.672678e-01,-1.115473e+00,-1.271336e+00,0.048467,0.367378,0.364698,-0.553396,0.267353,-0.406700,-0.971107
4,-1.539968,1.414199,-1.124554,-0.354513,-0.903357,2.120740e+00,-8.376255e-01,-1.538032e+00,1.020777e+00,1.761363e+00,...,1.334943e+00,-1.113349e+00,-1.525880e+00,0.048467,0.367378,0.364698,-0.553396,0.267353,-0.573832,-1.386393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4023995,1.596511,-0.174948,-0.359072,-0.937525,0.367168,-1.590032e-01,-4.658669e-02,1.683030e+00,-4.998149e-01,-1.714109e-01,...,-1.725461e-01,1.912980e+00,1.415132e+00,-0.027660,-0.551736,-0.843438,-0.156138,0.279443,0.048648,-0.186684
4023996,1.640554,-0.174615,-0.359072,-0.937525,0.428672,-1.587494e-01,-4.644606e-02,1.728091e+00,-4.664415e-01,-1.711261e-01,...,-1.721553e-01,1.996059e+00,1.471461e+00,-0.027660,-0.551736,-0.843438,-0.156138,0.279443,0.048502,-0.187048
4023997,1.684448,-0.174336,-0.359072,-0.937525,0.491030,-1.585319e-01,-4.633274e-02,1.773832e+00,-7.012871e-01,-1.708851e-01,...,-1.718258e-01,2.080261e+00,1.528565e+00,-0.027660,-0.551736,-0.843438,-0.156138,0.279443,0.048378,-0.187355
4023998,1.728391,-0.174099,-0.359072,-0.937525,0.554236,2.584907e-16,7.782393e-17,-1.094350e-16,2.408285e-15,-1.706787e-01,...,-1.715477e-01,2.165587e+00,1.586440e+00,-0.027660,-0.551736,-0.843438,-0.156138,0.279443,0.048274,-0.187615


(None, None)

In [29]:
train_df = pd.concat([train_df, train_category, train[['id', 'breath_id', 'pressure', 'u_out']]], axis=1)
test_df = pd.concat([test_df, test_category, test[['id', 'breath_id', 'u_out']]], axis=1)

In [30]:
train_df = utils.reduce_mem_usage(train_df)
test_df = utils.reduce_mem_usage(test_df)

Mem. usage decreased from 2348.60 Mb to 598.66 Mb (74.5% reduction)
Mem. usage decreased from 1535.03 Mb to 391.43 Mb (74.5% reduction)


In [31]:
oof_total = np.zeros((len(train), CFG.num_classes))
sub_preds = np.zeros((test.shape[0], len(CFG.folds)))
val_idxes = []
models = []
y = train['pressure']
groups = train['breath_id']
gkfold = model_selection.GroupKFold(n_splits=CFG.n_folds)
scores = []
input_dim = len(train_value_col)

for i, (trn_idx, val_idx) in enumerate(splitter.split(train_df, y, groups)):
    if i not in CFG.folds:
        continue

    trn_df = train_df.loc[trn_idx, :].reset_index(drop=True)
    val_df = train_df.loc[val_idx, :].reset_index(drop=True)
    trn_y = y.values[trn_idx]
    val_y = y.values[val_idx]
    
    
    loaders = {
        phase: torchdata.DataLoader(
            VentilatorDataset(
                df_, train_value_col, train_category_col
            ),
            **CFG.loader_params[phase])  # type: ignore
        for phase, df_ in zip(["train", "valid", "test"], [trn_df, val_df, test_df])
    }
    
    
    model = RNNModel(
        input_dim=input_dim,
        lstm_dim=CFG.lstm_dim,
        dense_dim=CFG.dense_dim,
        logit_dim=CFG.logit_dim,
        num_classes=CFG.num_classes,
    )
    model_name = model.__class__.__name__
#     break
    
    learner = Learner(model)
    
    # loggers
    RUN_NAME = f'exp{str(CFG.exp_num)}'
    wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type=RUN_NAME + f'-fold-{i}')
    wandb.run.name = RUN_NAME + f'-fold-{i}'
    wandb_config = wandb.config
    wandb_config.model_name = model_name
    wandb.watch(model)
    
    # callbacks
    callbacks = []
    checkpoint_callback = ModelCheckpoint(
        monitor=f'Loss/val',
        mode='min',
        dirpath=OUTPUT_DIR,
        verbose=False,
        save_weights_only=True,
        filename=f'{model_name}-{learner.current_epoch}-{i}')
    callbacks.append(checkpoint_callback)

#     early_stop_callback = EarlyStopping(
#         monitor='Loss/val',
#         min_delta=0.00,
#         patience=10,
#         verbose=True,
#         mode='min')
#     callbacks.append(early_stop_callback)
    
    loggers = []
    loggers.append(WandbLogger())
    
    trainer = pl.Trainer(
        logger=loggers,
        callbacks=callbacks,
        max_epochs=CFG.epochs,
        default_root_dir=OUTPUT_DIR,
        gpus=1,
#         fast_dev_run=DEBUG,
        deterministic=True,
        benchmark=False,
        )
    
    trainer.fit(learner, train_dataloader=loaders['train'], val_dataloaders=loaders['valid'])
#     trainer.save_checkpoint(OUTPUT_DIR / "last.ckpt")
    print('train done.')
    
    #############
    # validation (to make oof)
    #############
    checkpoint = torch.load(checkpoint_callback.best_model_path)
    learner.load_state_dict(checkpoint['state_dict'])
    
    model = model.to(device)
    oof_pred, oof_target = evaluate(model, loaders, phase="valid")
    models.append(model)
    
    oof_score = get_score(oof_pred, oof_target, val_df['u_out'].values)
    scores.append(oof_score)
    oof_total[val_idx] = oof_pred.reshape(1, -1).T / CFG.bias
    val_idxes.append(val_idx)
    
    print('validate done.')
    print(f'fold = {i}, auc = {oof_score}')
    wandb.log({'CV_score': oof_score})
    
    #############
    # inference
    #############
    test_pred, _ = evaluate(model, loaders, phase="test")
    sub_preds[:, i] = test_pred
    
    print('inference done.')

# test_preds_total = np.array(test_preds_total)


init LSTM(516, 512, num_layers=2, batch_first=True, bidirectional=True)


[34m[1mwandb[0m: Currently logged in as: [33msqrt4kaido[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type           | Params
---------------------------------------------
0 | model     | RNNModel       | 12.8 M
1 | criterion | VentilatorLoss | 0     
---------------------------------------------
12.8 M    Trainable params
0         Non-trainable params
12.8 M    Total params
51.152    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

epoch = 0, custom_mae = 17.446640014648438


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

epoch = 0, custom_mae = 1.0228254795074463


Validating: 0it [00:00, ?it/s]

epoch = 1, custom_mae = 0.822645366191864


Validating: 0it [00:00, ?it/s]

epoch = 2, custom_mae = 0.805981457233429


Validating: 0it [00:00, ?it/s]

epoch = 3, custom_mae = 0.8138332962989807


Validating: 0it [00:00, ?it/s]

epoch = 4, custom_mae = 0.7847405076026917


Validating: 0it [00:00, ?it/s]

epoch = 5, custom_mae = 0.6824480891227722


Validating: 0it [00:00, ?it/s]

epoch = 6, custom_mae = 0.6987932324409485


Validating: 0it [00:00, ?it/s]

epoch = 7, custom_mae = 0.6041554808616638


Validating: 0it [00:00, ?it/s]

epoch = 8, custom_mae = 0.6017454266548157


Validating: 0it [00:00, ?it/s]

epoch = 9, custom_mae = 0.6038059592247009


Validating: 0it [00:00, ?it/s]

epoch = 10, custom_mae = 0.5543243885040283


Validating: 0it [00:00, ?it/s]

epoch = 11, custom_mae = 0.5616458058357239


Validating: 0it [00:00, ?it/s]

epoch = 12, custom_mae = 0.48690876364707947


Validating: 0it [00:00, ?it/s]

epoch = 13, custom_mae = 0.5099397897720337


Validating: 0it [00:00, ?it/s]

epoch = 14, custom_mae = 0.653659462928772


Validating: 0it [00:00, ?it/s]

epoch = 15, custom_mae = 0.48031413555145264


Validating: 0it [00:00, ?it/s]

epoch = 16, custom_mae = 0.4764450192451477


Validating: 0it [00:00, ?it/s]

epoch = 17, custom_mae = 0.47420042753219604


Validating: 0it [00:00, ?it/s]

epoch = 18, custom_mae = 0.4468702971935272


Validating: 0it [00:00, ?it/s]

epoch = 19, custom_mae = 0.4590766131877899


Validating: 0it [00:00, ?it/s]

epoch = 20, custom_mae = 0.41014862060546875


Validating: 0it [00:00, ?it/s]

epoch = 21, custom_mae = 0.38286635279655457


Validating: 0it [00:00, ?it/s]

epoch = 22, custom_mae = 0.3520800769329071


Validating: 0it [00:00, ?it/s]

epoch = 23, custom_mae = 0.4495707154273987


Validating: 0it [00:00, ?it/s]

epoch = 24, custom_mae = 0.360283225774765


Validating: 0it [00:00, ?it/s]

epoch = 25, custom_mae = 0.34647393226623535


Validating: 0it [00:00, ?it/s]

epoch = 26, custom_mae = 0.3366694152355194


Validating: 0it [00:00, ?it/s]

epoch = 27, custom_mae = 0.31997108459472656


Validating: 0it [00:00, ?it/s]

epoch = 28, custom_mae = 0.31027936935424805


Validating: 0it [00:00, ?it/s]

epoch = 29, custom_mae = 0.2906031310558319


Validating: 0it [00:00, ?it/s]

epoch = 30, custom_mae = 0.2978954017162323


Validating: 0it [00:00, ?it/s]

epoch = 31, custom_mae = 0.29939934611320496


Validating: 0it [00:00, ?it/s]

epoch = 32, custom_mae = 0.27745628356933594


Validating: 0it [00:00, ?it/s]

epoch = 33, custom_mae = 0.27544260025024414


Validating: 0it [00:00, ?it/s]

epoch = 34, custom_mae = 0.26939547061920166


Validating: 0it [00:00, ?it/s]

epoch = 35, custom_mae = 0.278970330953598


Validating: 0it [00:00, ?it/s]

epoch = 36, custom_mae = 0.26440611481666565


Validating: 0it [00:00, ?it/s]

epoch = 37, custom_mae = 0.26375460624694824


Validating: 0it [00:00, ?it/s]

epoch = 38, custom_mae = 0.2620106339454651


Validating: 0it [00:00, ?it/s]

epoch = 39, custom_mae = 0.2616672217845917


Validating: 0it [00:00, ?it/s]

epoch = 40, custom_mae = 0.5334144830703735


Validating: 0it [00:00, ?it/s]

epoch = 41, custom_mae = 0.8172069191932678


Validating: 0it [00:00, ?it/s]

epoch = 42, custom_mae = 0.4935995042324066


Validating: 0it [00:00, ?it/s]

epoch = 43, custom_mae = 0.379083514213562


Validating: 0it [00:00, ?it/s]

epoch = 44, custom_mae = 0.38821184635162354


Validating: 0it [00:00, ?it/s]

epoch = 45, custom_mae = 0.37194153666496277


Validating: 0it [00:00, ?it/s]

epoch = 46, custom_mae = 0.3956718444824219


Validating: 0it [00:00, ?it/s]

epoch = 47, custom_mae = 0.4038439691066742


Validating: 0it [00:00, ?it/s]

epoch = 48, custom_mae = 0.437936395406723


Validating: 0it [00:00, ?it/s]

epoch = 49, custom_mae = 0.3557945191860199


Validating: 0it [00:00, ?it/s]

epoch = 50, custom_mae = 0.3363475501537323


Validating: 0it [00:00, ?it/s]

epoch = 51, custom_mae = 0.4264174997806549


Validating: 0it [00:00, ?it/s]

epoch = 52, custom_mae = 0.32757410407066345


Validating: 0it [00:00, ?it/s]

epoch = 53, custom_mae = 0.3288559913635254


Validating: 0it [00:00, ?it/s]

epoch = 54, custom_mae = 0.34188857674598694


Validating: 0it [00:00, ?it/s]

epoch = 55, custom_mae = 0.3753376603126526


Validating: 0it [00:00, ?it/s]

epoch = 56, custom_mae = 0.3364434540271759


Validating: 0it [00:00, ?it/s]

epoch = 57, custom_mae = 0.278051495552063


Validating: 0it [00:00, ?it/s]

epoch = 58, custom_mae = 0.30279427766799927


Validating: 0it [00:00, ?it/s]

epoch = 59, custom_mae = 0.3138810396194458


Validating: 0it [00:00, ?it/s]

epoch = 60, custom_mae = 0.26556727290153503


Validating: 0it [00:00, ?it/s]

epoch = 61, custom_mae = 0.26977062225341797


Validating: 0it [00:00, ?it/s]

epoch = 62, custom_mae = 0.2519145607948303


Validating: 0it [00:00, ?it/s]

epoch = 63, custom_mae = 0.24467404186725616


Validating: 0it [00:00, ?it/s]

epoch = 64, custom_mae = 0.2549111843109131


Validating: 0it [00:00, ?it/s]

epoch = 65, custom_mae = 0.24324138462543488


Validating: 0it [00:00, ?it/s]

epoch = 66, custom_mae = 0.24428315460681915


Validating: 0it [00:00, ?it/s]

epoch = 67, custom_mae = 0.2322830855846405


Validating: 0it [00:00, ?it/s]

epoch = 68, custom_mae = 0.23065517842769623


Validating: 0it [00:00, ?it/s]

epoch = 69, custom_mae = 0.22953663766384125


Validating: 0it [00:00, ?it/s]

epoch = 70, custom_mae = 0.22500775754451752


Validating: 0it [00:00, ?it/s]

epoch = 71, custom_mae = 0.2214001715183258


Validating: 0it [00:00, ?it/s]

epoch = 72, custom_mae = 0.2200515866279602


Validating: 0it [00:00, ?it/s]

epoch = 73, custom_mae = 0.21832270920276642


Validating: 0it [00:00, ?it/s]

epoch = 74, custom_mae = 0.21813559532165527


Validating: 0it [00:00, ?it/s]

epoch = 75, custom_mae = 0.2150060385465622


Validating: 0it [00:00, ?it/s]

epoch = 76, custom_mae = 0.21464946866035461


Validating: 0it [00:00, ?it/s]

epoch = 77, custom_mae = 0.2141568660736084


Validating: 0it [00:00, ?it/s]

epoch = 78, custom_mae = 0.21375219523906708


Validating: 0it [00:00, ?it/s]

epoch = 79, custom_mae = 0.21357250213623047


Validating: 0it [00:00, ?it/s]

epoch = 80, custom_mae = 0.369255930185318


Validating: 0it [00:00, ?it/s]

epoch = 81, custom_mae = 0.3267573118209839


Validating: 0it [00:00, ?it/s]

epoch = 82, custom_mae = 0.39797961711883545


Validating: 0it [00:00, ?it/s]

epoch = 83, custom_mae = 0.33739304542541504


Validating: 0it [00:00, ?it/s]

epoch = 84, custom_mae = 0.307600200176239


Validating: 0it [00:00, ?it/s]

epoch = 85, custom_mae = 0.3043106198310852


Validating: 0it [00:00, ?it/s]

epoch = 86, custom_mae = 0.3111591935157776


Validating: 0it [00:00, ?it/s]

epoch = 87, custom_mae = 0.31195271015167236


Validating: 0it [00:00, ?it/s]

epoch = 88, custom_mae = 0.2986454665660858


Validating: 0it [00:00, ?it/s]

epoch = 89, custom_mae = 0.32216042280197144


Validating: 0it [00:00, ?it/s]

epoch = 90, custom_mae = 0.39508703351020813


Validating: 0it [00:00, ?it/s]

epoch = 91, custom_mae = 0.26694729924201965


Validating: 0it [00:00, ?it/s]

epoch = 92, custom_mae = 0.2574012875556946


Validating: 0it [00:00, ?it/s]

epoch = 93, custom_mae = 0.2706323564052582


Validating: 0it [00:00, ?it/s]

epoch = 94, custom_mae = 0.2709818184375763


Validating: 0it [00:00, ?it/s]

epoch = 95, custom_mae = 0.3122158348560333


Validating: 0it [00:00, ?it/s]

epoch = 96, custom_mae = 0.2702139914035797


Validating: 0it [00:00, ?it/s]

epoch = 97, custom_mae = 0.25407442450523376


Validating: 0it [00:00, ?it/s]

epoch = 98, custom_mae = 0.23739373683929443


Validating: 0it [00:00, ?it/s]

epoch = 99, custom_mae = 0.2322944700717926


Validating: 0it [00:00, ?it/s]

epoch = 100, custom_mae = 0.2404898852109909


Validating: 0it [00:00, ?it/s]

epoch = 101, custom_mae = 0.224783793091774


Validating: 0it [00:00, ?it/s]

epoch = 102, custom_mae = 0.2421800196170807


Validating: 0it [00:00, ?it/s]

epoch = 103, custom_mae = 0.21784770488739014


Validating: 0it [00:00, ?it/s]

epoch = 104, custom_mae = 0.22584845125675201


Validating: 0it [00:00, ?it/s]

epoch = 105, custom_mae = 0.21366244554519653


Validating: 0it [00:00, ?it/s]

epoch = 106, custom_mae = 0.21310800313949585


Validating: 0it [00:00, ?it/s]

epoch = 107, custom_mae = 0.21200227737426758


Validating: 0it [00:00, ?it/s]

epoch = 108, custom_mae = 0.20909495651721954


Validating: 0it [00:00, ?it/s]

epoch = 109, custom_mae = 0.20843328535556793


Validating: 0it [00:00, ?it/s]

epoch = 110, custom_mae = 0.20531630516052246


Validating: 0it [00:00, ?it/s]

epoch = 111, custom_mae = 0.20665067434310913


Validating: 0it [00:00, ?it/s]

epoch = 112, custom_mae = 0.2043733149766922


Validating: 0it [00:00, ?it/s]

epoch = 113, custom_mae = 0.20323140919208527


Validating: 0it [00:00, ?it/s]

epoch = 114, custom_mae = 0.2035246640443802


Validating: 0it [00:00, ?it/s]

epoch = 115, custom_mae = 0.2027646154165268


Validating: 0it [00:00, ?it/s]

epoch = 116, custom_mae = 0.2024541199207306


Validating: 0it [00:00, ?it/s]

epoch = 117, custom_mae = 0.20256933569908142


Validating: 0it [00:00, ?it/s]

epoch = 118, custom_mae = 0.20224139094352722


Validating: 0it [00:00, ?it/s]

epoch = 119, custom_mae = 0.20211897790431976


Validating: 0it [00:00, ?it/s]

epoch = 120, custom_mae = 0.33913707733154297


Validating: 0it [00:00, ?it/s]

epoch = 121, custom_mae = 0.3234524726867676


Validating: 0it [00:00, ?it/s]

epoch = 122, custom_mae = 0.2878609299659729


Validating: 0it [00:00, ?it/s]

epoch = 123, custom_mae = 0.3784497380256653


Validating: 0it [00:00, ?it/s]

epoch = 124, custom_mae = 0.2654797434806824


Validating: 0it [00:00, ?it/s]

epoch = 125, custom_mae = 0.3216676414012909


Validating: 0it [00:00, ?it/s]

epoch = 126, custom_mae = 0.2765977680683136


Validating: 0it [00:00, ?it/s]

epoch = 127, custom_mae = 0.25445184111595154


Validating: 0it [00:00, ?it/s]

epoch = 128, custom_mae = 0.267313152551651


Validating: 0it [00:00, ?it/s]

epoch = 129, custom_mae = 0.2586140036582947


Validating: 0it [00:00, ?it/s]

epoch = 130, custom_mae = 0.3028605580329895


Validating: 0it [00:00, ?it/s]

epoch = 131, custom_mae = 0.23497633635997772


Validating: 0it [00:00, ?it/s]

epoch = 132, custom_mae = 0.30564093589782715


Validating: 0it [00:00, ?it/s]

epoch = 133, custom_mae = 0.2424144595861435


Validating: 0it [00:00, ?it/s]

epoch = 134, custom_mae = 0.2322714477777481


Validating: 0it [00:00, ?it/s]

epoch = 135, custom_mae = 0.23053768277168274


Validating: 0it [00:00, ?it/s]

epoch = 136, custom_mae = 0.23715120553970337


Validating: 0it [00:00, ?it/s]

epoch = 137, custom_mae = 0.23080439865589142


Validating: 0it [00:00, ?it/s]

epoch = 138, custom_mae = 0.23067937791347504


Validating: 0it [00:00, ?it/s]

epoch = 139, custom_mae = 0.2273094654083252


Validating: 0it [00:00, ?it/s]

epoch = 140, custom_mae = 0.21466434001922607


Validating: 0it [00:00, ?it/s]

epoch = 141, custom_mae = 0.2130289375782013


Validating: 0it [00:00, ?it/s]

epoch = 142, custom_mae = 0.2133970707654953


Validating: 0it [00:00, ?it/s]

epoch = 143, custom_mae = 0.21272018551826477


Validating: 0it [00:00, ?it/s]

epoch = 144, custom_mae = 0.20729033648967743


Validating: 0it [00:00, ?it/s]

epoch = 145, custom_mae = 0.20705334842205048


Validating: 0it [00:00, ?it/s]

epoch = 146, custom_mae = 0.20364850759506226


Validating: 0it [00:00, ?it/s]

epoch = 147, custom_mae = 0.20427832007408142


Validating: 0it [00:00, ?it/s]

epoch = 148, custom_mae = 0.20151624083518982


Validating: 0it [00:00, ?it/s]

epoch = 149, custom_mae = 0.20120789110660553


Validating: 0it [00:00, ?it/s]

epoch = 150, custom_mae = 0.20019635558128357


Validating: 0it [00:00, ?it/s]

epoch = 151, custom_mae = 0.1991260051727295


Validating: 0it [00:00, ?it/s]

epoch = 152, custom_mae = 0.19860492646694183


Validating: 0it [00:00, ?it/s]

epoch = 153, custom_mae = 0.19833272695541382


Validating: 0it [00:00, ?it/s]

epoch = 154, custom_mae = 0.19759635627269745


Validating: 0it [00:00, ?it/s]

epoch = 155, custom_mae = 0.19717636704444885


Validating: 0it [00:00, ?it/s]

epoch = 156, custom_mae = 0.19702517986297607


Validating: 0it [00:00, ?it/s]

epoch = 157, custom_mae = 0.19701480865478516


Validating: 0it [00:00, ?it/s]

epoch = 158, custom_mae = 0.19694125652313232


Validating: 0it [00:00, ?it/s]

epoch = 159, custom_mae = 0.1968424767255783


Validating: 0it [00:00, ?it/s]

epoch = 160, custom_mae = 0.3445664644241333


Validating: 0it [00:00, ?it/s]

epoch = 161, custom_mae = 0.2613159418106079


Validating: 0it [00:00, ?it/s]

epoch = 162, custom_mae = 0.2642974257469177


Validating: 0it [00:00, ?it/s]

epoch = 163, custom_mae = 0.2774612307548523


Validating: 0it [00:00, ?it/s]

epoch = 164, custom_mae = 0.23476701974868774


Validating: 0it [00:00, ?it/s]

epoch = 165, custom_mae = 0.23304443061351776


Validating: 0it [00:00, ?it/s]

epoch = 166, custom_mae = 0.2445569932460785


Validating: 0it [00:00, ?it/s]

epoch = 167, custom_mae = 0.22929202020168304


Validating: 0it [00:00, ?it/s]

epoch = 168, custom_mae = 0.23252195119857788


Validating: 0it [00:00, ?it/s]

epoch = 169, custom_mae = 0.22594642639160156


Validating: 0it [00:00, ?it/s]

epoch = 170, custom_mae = 0.21930046379566193


Validating: 0it [00:00, ?it/s]

epoch = 171, custom_mae = 0.22737912833690643


Validating: 0it [00:00, ?it/s]

epoch = 172, custom_mae = 0.2417377531528473


Validating: 0it [00:00, ?it/s]

epoch = 173, custom_mae = 0.27591389417648315


Validating: 0it [00:00, ?it/s]

epoch = 174, custom_mae = 0.2256881147623062


Validating: 0it [00:00, ?it/s]

epoch = 175, custom_mae = 0.22479425370693207


Validating: 0it [00:00, ?it/s]

epoch = 176, custom_mae = 0.2168760597705841


Validating: 0it [00:00, ?it/s]

epoch = 177, custom_mae = 0.22012701630592346


Validating: 0it [00:00, ?it/s]

epoch = 178, custom_mae = 0.21089544892311096


Validating: 0it [00:00, ?it/s]

epoch = 179, custom_mae = 0.20798863470554352


Validating: 0it [00:00, ?it/s]

epoch = 180, custom_mae = 0.20573893189430237


Validating: 0it [00:00, ?it/s]

epoch = 181, custom_mae = 0.2135738879442215


Validating: 0it [00:00, ?it/s]

epoch = 182, custom_mae = 0.2028931975364685


Validating: 0it [00:00, ?it/s]

epoch = 183, custom_mae = 0.20160047709941864


Validating: 0it [00:00, ?it/s]

epoch = 184, custom_mae = 0.20062294602394104


Validating: 0it [00:00, ?it/s]

epoch = 185, custom_mae = 0.19979116320610046


Validating: 0it [00:00, ?it/s]

epoch = 186, custom_mae = 0.19947804510593414


Validating: 0it [00:00, ?it/s]

epoch = 187, custom_mae = 0.19832997024059296


Validating: 0it [00:00, ?it/s]

epoch = 188, custom_mae = 0.19721496105194092


Validating: 0it [00:00, ?it/s]

epoch = 189, custom_mae = 0.1964375525712967


Validating: 0it [00:00, ?it/s]

epoch = 190, custom_mae = 0.19677229225635529


Validating: 0it [00:00, ?it/s]

epoch = 191, custom_mae = 0.1954764425754547


Validating: 0it [00:00, ?it/s]

epoch = 192, custom_mae = 0.19560974836349487


Validating: 0it [00:00, ?it/s]

epoch = 193, custom_mae = 0.19504190981388092


Validating: 0it [00:00, ?it/s]

epoch = 194, custom_mae = 0.1948116272687912


Validating: 0it [00:00, ?it/s]

epoch = 195, custom_mae = 0.19482649862766266


Validating: 0it [00:00, ?it/s]

epoch = 196, custom_mae = 0.1946546584367752


Validating: 0it [00:00, ?it/s]

epoch = 197, custom_mae = 0.19454029202461243


Validating: 0it [00:00, ?it/s]

epoch = 198, custom_mae = 0.19462278485298157


Validating: 0it [00:00, ?it/s]

epoch = 199, custom_mae = 0.19457274675369263
train done.
validate done.
fold = 0, auc = 0.1945402988063898
inference done.


In [31]:
if len(CFG.folds) != CFG.n_folds:

    oof_score = get_score(oof_pred, oof_target, val_df['u_out'].values)
    print(f'MAE {oof_score}')

    oof_df = train.iloc[val_idxes[0], :1]
    oof_df['pressure'] = oof_pred
    oof_df.to_csv(OUTPUT_DIR / f'oof{CFG.exp_num}.csv',index = False)    
else:
    score = get_score(y, oof_total, train['u_out'].values)
    print(f'MAE {score}: folds: {scores}')

    oof_df = pd.DataFrame({'id': train['id'].values, 'pressure':oof_total.reshape(-1)})
    oof_df.to_csv(OUTPUT_DIR / f'oof{CFG.exp_num}.csv',index = False)
oof_df

MAE 0.1915309141061239


Unnamed: 0,id,pressure
240,241,6.457761
241,242,5.627676
242,243,6.595778
243,244,8.729868
244,245,10.903368
...,...,...
6035995,6035996,45.899429
6035996,6035997,47.361847
6035997,6035998,48.577316
6035998,6035999,50.388798


In [32]:
sub = pd.read_csv(DATA_DIR / 'sample_submission.csv')
sub['pressure'] = np.mean(sub_preds, axis=1)
sub.to_csv(OUTPUT_DIR / f'sub{CFG.exp_num}.csv',index = False)
sub

Unnamed: 0,id,pressure
0,1,6.293637
1,2,5.930646
2,3,7.114677
3,4,7.626244
4,5,9.073135
...,...,...
4023995,4023996,36.246300
4023996,4023997,38.818066
4023997,4023998,40.665672
4023998,4023999,43.085598


In [33]:
wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type='summary')
wandb.run.name = 'summary'
wandb.log({'CV_score': oof_score})
# wandb.save(utils.get_notebook_path())
wandb.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
CV_score,▁
Loss/val,█▆▄▅▃▃▂▂▄▄▃▂▂▁▁▁▃▂▂▂▂▁▁▁▂▂▂▁▁▁▁▁▂▂▁▁▁▁▁▁
custom_mae/val,█▆▄▅▃▃▂▂▄▄▃▂▂▁▁▁▃▂▂▂▂▁▁▁▂▂▂▁▁▁▁▁▂▂▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
CV_score,0.19153
Loss/val,0.19149
custom_mae/val,0.19153
epoch,199.0
trainer/global_step,94199.0


[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
CV_score,▁

0,1
CV_score,0.19153
