In [1]:
import sys
from time import time
import numpy as np
import pandas as pd
from pathlib import Path
import lightgbm as lgb
import matplotlib.pyplot as plt 
import seaborn as sns
from tqdm import tqdm
import copy
import wandb
from collections import OrderedDict

from sklearn.metrics import mean_absolute_error
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as torchdata

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import WandbLogger


In [2]:
sys.path.append('../../src/')
import utils as utils
from utils import Timer

In [3]:
class CFG:
    seed = 42
    exp_num = 21
    local = True
    n_folds = 5
    folds = [0]
    debug = False
    bias = 1000
    epochs = 200

    
    ######################
    # Dataset #
    ######################
    transforms = {
        "train": [{"name": ""}],
        "valid": [{"name": ""}],
        "test": [{"name": ""}]
    }

    ######################
    # Loaders #
    ######################
    loader_params = {
        "train": {
            'batch_size': 128,
            'shuffle': True,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': True,
        },
        "valid": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        },
        "test": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        }
    }

    ######################
    # Split #
    ######################
    split = "GroupKFold"
    split_params = {
        "n_splits": 5,
    }

    ######################
    # Model #
    ######################
    input_dim = 5

    dense_dim = 512
    lstm_dim = 512
    logit_dim = 512
    num_classes = 1

    ######################
    # Criterion #
    ######################
#     loss_name = "rmspe_loss"
#     loss_params: dict = {}

    ######################
    # Optimizer #
    ######################
    optimizer_name = "AdamW"
    optimizer_params = {
        "lr": 0.001,
        'weight_decay': 1e-6
    }

    ######################
    # Scheduler #
    ######################
    scheduler_name = "CosineAnnealingLR"
    scheduler_params = {
        'T_max': 25, 
        'eta_min': 1e-6
    }

In [4]:
utils.set_seed(CFG.seed)

In [5]:
if CFG.local:
    DATA_DIR = Path("/home/knikaido/work/Ventilator-Pressure-Prediction/data/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('./output/')
else:
    DATA_DIR = Path("../input/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('')   

In [6]:
def get_transforms(phase: str):
    transforms = CFG.transforms
    if transforms is None:
        return None
    else:
        if transforms[phase] is None:
            return None
        trns_list = []
        for trns_conf in transforms[phase]:
            trns_name = trns_conf["name"]
            trns_params = {} if trns_conf.get("params") is None else \
                trns_conf["params"]
            if globals().get(trns_name) is not None:
                trns_cls = globals()[trns_name]
                trns_list.append(trns_cls(**trns_params))

        if len(trns_list) > 0:
            return Compose(trns_list)
        else:
            return None
        
        
class Normalize:
    def __call__(self, y: np.ndarray):
        max_vol = np.abs(y).max()
        y_vol = y * 1 / max_vol
        return np.asfortranarray(y_vol)


class Compose:
    def __init__(self, transforms: list):
        self.transforms = transforms

    def __call__(self, y: np.ndarray):
        for trns in self.transforms:
            y = trns(y)
        return y

In [7]:
def compute_metric(preds, trues, u_outs):
    """
    Metric for the problem, as I understood it.
    """
    
    y = trues
    w = 1 - u_outs
    
    assert y.shape == preds.shape and w.shape == y.shape, (y.shape, preds.shape, w.shape)
    
    mae = w * np.abs(y - preds)
    mae = mae.sum() / w.sum()
    
    return mae


class VentilatorLoss(nn.Module):
    """
    Directly optimizes the competition metric
    """
    def __call__(self, preds, y, u_out):
        w = 1 - u_out
        mae = w * (y - preds).abs()
        mae = mae.sum(-1) / w.sum(-1)

        return mae

In [8]:
def get_criterion():
    return VentilatorLoss()

In [9]:
# Custom optimizer
__OPTIMIZERS__ = {}


def get_optimizer(model: nn.Module):
    optimizer_name = CFG.optimizer_name
    if optimizer_name == "SAM":
        base_optimizer_name = CFG.base_optimizer
        if __OPTIMIZERS__.get(base_optimizer_name) is not None:
            base_optimizer = __OPTIMIZERS__[base_optimizer_name]
        else:
            base_optimizer = optim.__getattribute__(base_optimizer_name)
        return SAM(model.parameters(), base_optimizer, **CFG.optimizer_params)

    if __OPTIMIZERS__.get(optimizer_name) is not None:
        return __OPTIMIZERS__[optimizer_name](model.parameters(),
                                              **CFG.optimizer_params)
    else:
        return optim.__getattribute__(optimizer_name)(model.parameters(),
                                                      **CFG.optimizer_params)


def get_scheduler(optimizer):
    scheduler_name = CFG.scheduler_name

    if scheduler_name is None:
        return
    else:
        return optim.lr_scheduler.__getattribute__(scheduler_name)(
            optimizer, **CFG.scheduler_params)

In [10]:
# validation
splitter = getattr(model_selection, CFG.split)(**CFG.split_params)

In [11]:
class VentilatorDataset(torchdata.Dataset):
    def __init__(self, df, train_value_col, train_category_col):
        if "pressure" not in df.columns:
            df['pressure'] = 0
        self.df = df
        self.groups = df.groupby('breath_id').groups
        self.keys = list(self.groups.keys())
        self.train_value_col = train_value_col
        self.train_category_col = train_category_col

        
    def __len__(self):
        return len(self.groups)

    def __getitem__(self, idx):
        indexes = self.groups[self.keys[idx]]
        df_ = self.df.iloc[indexes]
        
        input_value = df_[self.train_value_col].values
        input_category = df_[self.train_category_col].values

        u_out_ = df_['u_out'].values
        p_ = df_['pressure'].values

        data = {
            "input_value": input_value.astype(np.float32),
            "input_category": input_category.astype(int),
            "u_out": u_out_.astype(np.float32),
            "p": p_.astype(np.float32),
        }
        
        return data

In [12]:
class RNNModel(nn.Module):
    def __init__(
        self,
        input_dim=4,
        lstm_dim=256,
        dense_dim=256,
        logit_dim=256,
        num_classes=1,
    ):
        super().__init__()
        
        self.rc_emb = nn.Embedding(6, 4, padding_idx=0)
        
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, dense_dim // 2),
            nn.LayerNorm(dense_dim // 2),
            nn.ReLU(),
            nn.Linear(dense_dim // 2, dense_dim),
            nn.LayerNorm(dense_dim),
            nn.ReLU(),
        )

        self.conv_basic = nn.Sequential(
            nn.Conv1d(in_channels=dense_dim+4, out_channels=dense_dim+4, kernel_size=3, padding=1),
            nn.LayerNorm(80),
            nn.ReLU(),
            nn.Conv1d(in_channels=dense_dim+4, out_channels=dense_dim+4, kernel_size=3, padding=1),
            nn.LayerNorm(80),
            nn.ReLU(),
        )

        self.lstm = nn.LSTM(dense_dim+4, lstm_dim, num_layers=2, batch_first=True, bidirectional=True)

        self.logits = nn.Sequential(
            nn.Linear(lstm_dim * 2, logit_dim),
            nn.ReLU(),
            nn.Linear(logit_dim, num_classes),
        )     
        
        # nakamaさんの初期化
        for n, m in self.named_modules():
            if isinstance(m, nn.LSTM):
                print(f'init {m}')
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        nn.init.orthogonal_(param.data)
                    else:
                        nn.init.normal_(param.data)
            elif isinstance(m, nn.GRU):
                print(f"init {m}")
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        init.orthogonal_(param.data)
                    else:
                        init.normal_(param.data)

    def forward(self, cont_seq_x, cate_seq_x):
        bs = cont_seq_x.size(0)
        rc_emb = self.rc_emb(cate_seq_x).view(bs, 80, -1)
        
        features = self.mlp(cont_seq_x)
        features = torch.cat((rc_emb, features), 2)

        features = self.conv_basic(features.permute([0, 2, 1]))
        features, _ = self.lstm(features.permute([0, 2, 1]))
        
        pred = self.logits(features)
        return pred

In [13]:
# Learner class(pytorch-lighting)
class Learner(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.criterion = get_criterion()
    
    def training_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input_value'], d_['input_category'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        return loss
    
    def validation_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input_value'], d_['input_category'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        
        self.log(f'Loss/val', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
        
        output = OrderedDict({
            "targets": d_['p'].detach(), "preds": output.detach(), "u_outs": d_['u_out'].detach(), "loss": loss.detach()
        })
        return output
    
    def validation_epoch_end(self, outputs):

        targets = torch.cat([o["targets"].view(-1) for o in outputs]).cpu().numpy()
        preds = torch.cat([o["preds"].view(-1) for o in outputs]).cpu().numpy()
        u_outs = torch.cat([o["u_outs"].view(-1) for o in outputs]).cpu().numpy()

        score = get_score(preds, targets, u_outs)
        self.log(f'custom_mae/val', score, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        print(f'epoch = {self.current_epoch}, custom_mae = {score}')

    def configure_optimizers(self):
        optimizer = get_optimizer(self.model)
        scheduler = get_scheduler(optimizer)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "Loss/val"}

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
def get_score(y_pred, y_true, u_outs):
    return compute_metric(y_pred, y_true, u_outs)


def to_np(input):
    return input.detach().cpu().numpy()

# oof
def evaluate(model, loaders, phase):
    model.eval()
    pred_list = []
    target_list = []
    with torch.no_grad():
        for batch in loaders[phase]:
            d_ = batch
            d_['input_value'] = d_['input_value'].to(device)
            d_['input_category'] = d_['input_category'].to(device)
            output = model(d_['input_value'], d_['input_category'])
#             output = nn.Softmax(dim=1)(output)
            pred_list.append(to_np(output))
            target_list.append(to_np(d_['p']))

    pred_list = np.concatenate(pred_list).reshape(-1)
    target_list = np.concatenate(target_list).reshape(-1)
    model.train()
    return pred_list, target_list

In [16]:
train = pd.read_csv(DATA_DIR / 'train.csv')
test = pd.read_csv(DATA_DIR / 'test.csv')

In [17]:
train = train[train['R'] != 50].reset_index(drop=True)
test = test[test['R'] != 50].reset_index(drop=True)

In [18]:
display(train), display(test)

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.000000,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.355850,0,12.234987
...,...,...,...,...,...,...,...,...
3625915,6035836,125743,20,10,2.531233,4.972427,1,6.329607
3625916,6035837,125743,20,10,2.565202,4.976734,1,6.540513
3625917,6035838,125743,20,10,2.599030,4.980354,1,6.189002
3625918,6035839,125743,20,10,2.632680,4.983397,1,6.329607


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.000000,0.000000,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.230610,0
4,5,0,5,20,0.127644,26.320956,0
...,...,...,...,...,...,...,...
2417195,4023996,125748,20,10,2.530117,4.971245,1
2417196,4023997,125748,20,10,2.563853,4.975709,1
2417197,4023998,125748,20,10,2.597475,4.979468,1
2417198,4023999,125748,20,10,2.631134,4.982648,1


(None, None)

In [19]:
def get_raw_features(input_df, dataType = 'train'):
    colum = ['time_step', 'u_in', 'R', 'C']

    return input_df[colum]

In [20]:
def get_category_features(input_df, dataType = 'train'):
    output_df = copy.deepcopy(input_df)
    colum = ['R_C']
    rc_map = {'5_10': 0, '5_20': 1, '5_50': 2, '20_10': 3, '20_20': 4, '20_50': 5}
    
    output_df['R_C'] = [f'{r}_{c}' for r, c in zip(output_df['R'], output_df['C'])]
    output_df['R_C'] = output_df['R_C'].map(rc_map)

    return output_df[colum]

In [21]:
def get_diff_shift_features(input_df, dataType = 'train'):
    
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    b_id_gby = input_df.groupby(['breath_id'])
    
    def g_by_diff(c_, i):
        temp_df=pd.concat([output_df.loc[:, ['breath_id', c_]], output_df.loc[:, ['breath_id', c_]].reset_index().shift(i).rename(columns=lambda s:s+'_shift')], axis=1)
        df_with_diff=temp_df.loc[(temp_df['breath_id']==temp_df['breath_id_shift']), slice(None)]
        return(df_with_diff[c_]-df_with_diff[f'{c_}_shift'])
    
    # diffより直接引いたほうが早い
    shift_idx = [-2, -1, 1, 2, 3, 4]
    for i in shift_idx:
        output_df[f'u_in_shift_{i}'] = b_id_gby['u_in'].shift(i)
        output_df[f'u_in_diff_{i}'] = g_by_diff('u_in', i)

        output_df[f'time_step_shift_{i}'] = b_id_gby['time_step'].shift(i)
        output_df[f'time_step_diff_{i}'] = g_by_diff('time_step', i)
    
    return output_df.iloc[:, c_num:]

In [22]:
def get_cum_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    b_id_gby = input_df.groupby(['breath_id'])
    
    output_df['u_in_cumsum'] = b_id_gby['u_in'].cumsum()
    output_df['time_step_cumsum'] = b_id_gby['time_step'].cumsum()
    
    return output_df.iloc[:, c_num:]

In [23]:
def get_simple_calc_features(input_df, dataType = 'train'):
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    output_df['area'] = output_df['time_step'] * output_df['u_in']
    output_df['area'] = output_df.groupby('breath_id')['area'].cumsum()
    
    return output_df.iloc[:, c_num:]

In [24]:
def get_agg_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    # Dict for aggregations
    create_feature_dict = {
        'u_in': [np.max, np.std, np.mean, 'first', 'last'],
    }
    
    def get_agg_window(start_time=0, end_time=3.0, add_suffix = False):
        
        df_tgt = output_df[(output_df['time_step'] >= start_time) & (output_df['time_step'] <= end_time)]
        df_feature = df_tgt.groupby(['breath_id']).agg(create_feature_dict)
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(start_time) + '_' + str(end_time))
            
        return df_feature
    
    df_agg_feature = get_agg_window().reset_index()
    
#     df_tmp = get_agg_window(start_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(start_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')

    output_df = pd.merge(output_df, df_agg_feature, how='left', on='breath_id')
    
    output_df['u_in_diffmax'] = output_df['u_in_amax'] - output_df['u_in']
    output_df['u_in_diffmean'] = output_df['u_in_mean'] - output_df['u_in']
    
    return output_df.iloc[:, c_num:]

In [25]:
def to_feature(input_df, dataType = 'train'):
    """input_df を特徴量行列に変換した新しいデータフレームを返す.
    """

    processors = [
        get_raw_features,
        get_category_features,
        get_simple_calc_features,
        get_diff_shift_features,
        get_cum_features,
        get_agg_features
    ]

    out_df = pd.DataFrame()

    for func in tqdm(processors, total=len(processors)):
        with Timer(prefix='' + func.__name__ + ' '):
            _df = func(input_df, dataType)

        # 長さが等しいことをチェック (ずれている場合, func の実装がおかしい)
        assert len(_df) == len(input_df), func.__name__
        out_df = pd.concat([out_df, _df], axis=1)
#     out_df = utils.reduce_mem_usage(out_df)
    
    return out_df

In [26]:
train_df = to_feature(train, dataType = 'train')
test_df = to_feature(test, dataType = 'test')

  0%|          | 0/6 [00:00<?, ?it/s]

get_raw_features  0.017[s]


 50%|█████     | 3/6 [00:01<00:01,  2.07it/s]

get_category_features  1.390[s]
get_simple_calc_features  0.099[s]
get_diff_shift_features  6.535[s]


 67%|██████▋   | 4/6 [00:08<00:05,  2.87s/it]

get_cum_features  0.107[s]


 83%|████████▎ | 5/6 [00:08<00:02,  2.03s/it]

get_agg_features  0.706[s]


100%|██████████| 6/6 [00:10<00:00,  1.68s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

get_raw_features  0.009[s]


 33%|███▎      | 2/6 [00:00<00:01,  2.15it/s]

get_category_features  0.905[s]
get_simple_calc_features  0.057[s]
get_diff_shift_features  3.561[s]


 67%|██████▋   | 4/6 [00:04<00:02,  1.33s/it]

get_cum_features  0.057[s]


 83%|████████▎ | 5/6 [00:05<00:01,  1.02s/it]

get_agg_features  0.439[s]


100%|██████████| 6/6 [00:05<00:00,  1.03it/s]


In [27]:
train_value_col = [i for i in train_df.columns.to_list() if i not in ['R_C']]
train_category_col = ['R_C']

In [28]:
ss = StandardScaler()

train_category = train_df[train_category_col]
train_df = pd.DataFrame(ss.fit_transform(train_df[train_value_col]), columns=train_value_col)
train_mean = train_df.mean()
train_df = train_df.fillna(train_df.mean())

test_category = test_df[train_category_col]
test_df = pd.DataFrame(ss.transform(test_df[train_value_col]), columns=train_value_col)
test_df = test_df.fillna(train_mean)

In [29]:
display(train_df), display(test_df)

Unnamed: 0,time_step,u_in,R,C,area,u_in_shift_-2,u_in_diff_-2,time_step_shift_-2,time_step_diff_-2,u_in_shift_-1,...,time_step_diff_4,u_in_cumsum,time_step_cumsum,u_in_amax,u_in_std,u_in_mean,u_in_first,u_in_last,u_in_diffmax,u_in_diffmean
0,-1.707097,-0.561606,1.102187,1.288664,-0.963494,9.605299e-01,-2.791837e+00,-1.705213e+00,-3.845508e-01,6.341705e-01,...,3.144948e-15,-1.051244,-1.116736,-0.392092,-0.102115,0.210553,-0.619271,0.588408,-0.119158,0.709177
1,-1.663494,0.615908,1.102187,1.288664,-0.960880,9.809278e-01,-6.003843e-01,-1.660000e+00,-5.671887e-01,9.043521e-01,...,3.144948e-15,-1.013036,-1.115680,-0.392092,-0.102115,0.210553,-0.619271,0.588408,-0.708865,-0.580513
2,-1.619619,0.881415,1.102187,1.288664,-0.954459,1.154371e+00,-4.081215e-01,-1.614540e+00,-7.379985e-01,9.239659e-01,...,3.144948e-15,-0.966251,-1.113560,-0.392092,-0.102115,0.210553,-0.619271,0.588408,-0.841833,-0.871314
3,-1.575529,0.900690,1.102187,1.288664,-0.944674,1.284027e+00,-6.034599e-01,-1.569440e+00,-6.963687e-01,1.090743e+00,...,3.144948e-15,-0.918844,-1.110372,-0.392092,-0.102115,0.210553,-0.619271,0.588408,-0.851486,-0.892425
4,-1.531199,1.064581,1.102187,1.288664,-0.930131,1.275012e+00,-2.772519e-01,-1.524250e+00,-5.975703e-01,1.215416e+00,...,5.680972e-01,-0.866143,-1.106110,-0.392092,-0.102115,0.210553,-0.619271,0.588408,-0.933564,-1.071929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3625915,1.572620,-0.247012,1.102187,-1.024890,0.210740,-2.331220e-01,-6.252708e-02,1.658460e+00,-5.220800e-01,-2.436603e-01,...,4.163168e-01,-0.700626,1.901836,-1.118206,-0.978052,-1.017838,-0.622190,0.437882,-1.018234,-0.184200
3625916,1.616633,-0.246735,1.102187,-1.024890,0.264677,-2.329148e-01,-6.237310e-02,1.703172e+00,-3.670408e-01,-2.434232e-01,...,5.064107e-01,-0.690283,1.982375,-1.118206,-0.978052,-1.017838,-0.622190,0.437882,-1.018373,-0.184504
3625917,1.660465,-0.246502,1.102187,-1.024890,0.319364,-2.327396e-01,-6.224554e-02,1.747912e+00,-2.909707e-01,-2.432240e-01,...,5.521327e-01,-0.679931,2.063975,-1.118206,-0.978052,-1.017838,-0.622190,0.437882,-1.018490,-0.184759
3625918,1.704065,-0.246306,1.102187,-1.024890,0.374794,2.703191e-16,7.225473e-18,-3.394666e-18,1.749340e-15,-2.430556e-01,...,4.316944e-01,-0.669573,2.146632,-1.118206,-0.978052,-1.017838,-0.622190,0.437882,-1.018588,-0.184973


Unnamed: 0,time_step,u_in,R,C,area,u_in_shift_-2,u_in_diff_-2,time_step_shift_-2,time_step_diff_-2,u_in_shift_-1,...,time_step_diff_4,u_in_cumsum,time_step_cumsum,u_in_amax,u_in_std,u_in_mean,u_in_first,u_in_last,u_in_diffmax,u_in_diffmean
0,-1.707097,-0.566968,-0.907287,-0.446501,-0.963494,4.254576e-01,-1.845349e+00,-1.710112e+00,1.408779e+00,-7.745434e-02,...,3.144948e-15,-1.051417,-1.116736,-0.100863,0.125166,0.081413,-0.622190,-1.270563,0.180937,0.657354
1,-1.665760,-0.083404,-0.907287,-0.446501,-0.962481,8.734576e-01,-1.731381e+00,-1.667695e+00,1.399038e+00,3.898446e-01,...,3.144948e-15,-1.035797,-1.115735,-0.100863,0.125166,0.081413,-0.622190,-1.270563,-0.061235,0.127723
2,-1.624396,0.375809,-0.907287,-0.446501,-0.958530,1.220091e+00,-1.482253e+00,-1.625318e+00,1.414113e+00,8.206263e-01,...,3.144948e-15,-1.005345,-1.113731,-0.100863,0.125166,0.081413,-0.622190,-1.270563,-0.291212,-0.375238
3,-1.583033,0.799138,-0.907287,-0.446501,-0.949941,1.503778e+00,-1.188485e+00,-1.582914e+00,1.419099e+00,1.153937e+00,...,3.144948e-15,-0.961218,-1.110725,-0.100863,0.125166,0.081413,-0.622190,-1.270563,-0.503218,-0.838895
4,-1.541710,1.126682,-0.907287,-0.446501,-0.935747,1.712085e+00,-9.411780e-01,-1.540509e+00,1.408547e+00,1.426721e+00,...,-1.428878e+00,-0.906511,-1.106717,-0.100863,0.125166,0.081413,-0.622190,-1.270563,-0.667254,-1.197643
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2417195,1.571173,-0.247088,1.102187,-1.024890,0.177634,-2.331824e-01,-6.256295e-02,1.656394e+00,-3.085967e-01,-2.437274e-01,...,4.964916e-01,-0.646712,1.899129,-0.176306,-0.716990,-0.966697,-0.272385,0.356971,-0.056305,-0.161268
2417196,1.614885,-0.246801,1.102187,-1.024890,0.231531,-2.329658e-01,-6.240676e-02,1.701117e+00,-2.709095e-01,-2.434813e-01,...,5.437983e-01,-0.636370,1.979625,-0.176306,-0.716990,-0.966697,-0.272385,0.356971,-0.056449,-0.161583
2417197,1.658449,-0.246559,1.102187,-1.024890,0.286176,-2.327803e-01,-6.228089e-02,1.746515e+00,-5.361112e-01,-2.432731e-01,...,4.078650e-01,-0.626021,2.061177,-0.176306,-0.716990,-0.966697,-0.272385,0.356971,-0.056570,-0.161848
2417198,1.702061,-0.246354,1.102187,-1.024890,0.341565,2.703191e-16,7.225473e-18,-3.394666e-18,1.749340e-15,-2.430946e-01,...,3.255772e-01,-0.615665,2.143785,-0.176306,-0.716990,-0.966697,-0.272385,0.356971,-0.056673,-0.162072


(None, None)

In [30]:
train_df = pd.concat([train_df, train_category, train[['id', 'breath_id', 'pressure', 'u_out']]], axis=1)
test_df = pd.concat([test_df, test_category, test[['id', 'breath_id', 'u_out']]], axis=1)

In [31]:
train_df = utils.reduce_mem_usage(train_df)
test_df = utils.reduce_mem_usage(test_df)

Mem. usage decreased from 1189.53 Mb to 304.30 Mb (74.4% reduction)
Mem. usage decreased from 774.55 Mb to 198.25 Mb (74.4% reduction)


In [32]:
oof_total = np.zeros((len(train), CFG.num_classes))
sub_preds = np.zeros((test.shape[0], len(CFG.folds)))
val_idxes = []
models = []
y = train['pressure']
groups = train['breath_id']
gkfold = model_selection.GroupKFold(n_splits=CFG.n_folds)
scores = []
input_dim = len(train_value_col)

for i, (trn_idx, val_idx) in enumerate(splitter.split(train_df, y, groups)):
    if i not in CFG.folds:
        continue

    trn_df = train_df.loc[trn_idx, :].reset_index(drop=True)
    val_df = train_df.loc[val_idx, :].reset_index(drop=True)
    trn_y = y.values[trn_idx]
    val_y = y.values[val_idx]
    
    
    loaders = {
        phase: torchdata.DataLoader(
            VentilatorDataset(
                df_, train_value_col, train_category_col
            ),
            **CFG.loader_params[phase])  # type: ignore
        for phase, df_ in zip(["train", "valid", "test"], [trn_df, val_df, test_df])
    }
    
    
    model = RNNModel(
        input_dim=input_dim,
        lstm_dim=CFG.lstm_dim,
        dense_dim=CFG.dense_dim,
        logit_dim=CFG.logit_dim,
        num_classes=CFG.num_classes,
    )
    model_name = model.__class__.__name__
#     break
    
    learner = Learner(model)
    
    # loggers
    RUN_NAME = f'exp{str(CFG.exp_num)}'
    wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type=RUN_NAME + f'-fold-{i}')
    wandb.run.name = RUN_NAME + f'-fold-{i}'
    wandb_config = wandb.config
    wandb_config.model_name = model_name
    wandb.watch(model)
    
    # callbacks
    callbacks = []
    checkpoint_callback = ModelCheckpoint(
        monitor=f'Loss/val',
        mode='min',
        dirpath=OUTPUT_DIR,
        verbose=False,
        save_weights_only=True,
        filename=f'{model_name}-{learner.current_epoch}-{i}')
    callbacks.append(checkpoint_callback)

#     early_stop_callback = EarlyStopping(
#         monitor='Loss/val',
#         min_delta=0.00,
#         patience=10,
#         verbose=True,
#         mode='min')
#     callbacks.append(early_stop_callback)
    
    loggers = []
    loggers.append(WandbLogger())
    
    trainer = pl.Trainer(
        logger=loggers,
        callbacks=callbacks,
        max_epochs=CFG.epochs,
        default_root_dir=OUTPUT_DIR,
        gpus=1,
#         fast_dev_run=DEBUG,
        deterministic=True,
        benchmark=False,
        )
    
    trainer.fit(learner, train_dataloader=loaders['train'], val_dataloaders=loaders['valid'])
#     trainer.save_checkpoint(OUTPUT_DIR / "last.ckpt")
    print('train done.')
    
    #############
    # validation (to make oof)
    #############
    checkpoint = torch.load(checkpoint_callback.best_model_path)
    learner.load_state_dict(checkpoint['state_dict'])
    
    model = model.to(device)
    oof_pred, oof_target = evaluate(model, loaders, phase="valid")
    models.append(model)
    
    oof_score = get_score(oof_pred, oof_target, val_df['u_out'].values)
    scores.append(oof_score)
    oof_total[val_idx] = oof_pred.reshape(1, -1).T / CFG.bias
    val_idxes.append(val_idx)
    
    print('validate done.')
    print(f'fold = {i}, auc = {oof_score}')
    wandb.log({'CV_score': oof_score})
    
    #############
    # inference
    #############
    test_pred, _ = evaluate(model, loaders, phase="test")
    sub_preds[:, i] = test_pred
    
    print('inference done.')

# test_preds_total = np.array(test_preds_total)


init LSTM(516, 512, num_layers=2, batch_first=True, bidirectional=True)


[34m[1mwandb[0m: Currently logged in as: [33msqrt4kaido[0m (use `wandb login --relogin` to force relogin)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type           | Params
---------------------------------------------
0 | model     | RNNModel       | 12.8 M
1 | criterion | VentilatorLoss | 0     
---------------------------------------------
12.8 M    Trainable params
0         Non-trainable params
12.8 M    Total params
51.143    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

epoch = 0, custom_mae = 17.251941680908203


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

epoch = 0, custom_mae = 0.7591480612754822


Validating: 0it [00:00, ?it/s]

epoch = 1, custom_mae = 0.6320192813873291


Validating: 0it [00:00, ?it/s]

epoch = 2, custom_mae = 0.5893327593803406


Validating: 0it [00:00, ?it/s]

epoch = 3, custom_mae = 0.6077409982681274


Validating: 0it [00:00, ?it/s]

epoch = 4, custom_mae = 0.5578079223632812


Validating: 0it [00:00, ?it/s]

epoch = 5, custom_mae = 0.5610255002975464


Validating: 0it [00:00, ?it/s]

epoch = 6, custom_mae = 0.5139026045799255


Validating: 0it [00:00, ?it/s]

epoch = 7, custom_mae = 0.4537058472633362


Validating: 0it [00:00, ?it/s]

epoch = 8, custom_mae = 0.44888532161712646


Validating: 0it [00:00, ?it/s]

epoch = 9, custom_mae = 0.4302043616771698


Validating: 0it [00:00, ?it/s]

epoch = 10, custom_mae = 0.4830106496810913


Validating: 0it [00:00, ?it/s]

epoch = 11, custom_mae = 0.4069691598415375


Validating: 0it [00:00, ?it/s]

epoch = 12, custom_mae = 0.362924188375473


Validating: 0it [00:00, ?it/s]

epoch = 13, custom_mae = 0.3708060085773468


Validating: 0it [00:00, ?it/s]

epoch = 14, custom_mae = 0.3502226769924164


Validating: 0it [00:00, ?it/s]

epoch = 15, custom_mae = 0.34948286414146423


Validating: 0it [00:00, ?it/s]

epoch = 16, custom_mae = 0.3338329792022705


Validating: 0it [00:00, ?it/s]

epoch = 17, custom_mae = 0.31016936898231506


Validating: 0it [00:00, ?it/s]

epoch = 18, custom_mae = 0.30969318747520447


Validating: 0it [00:00, ?it/s]

epoch = 19, custom_mae = 0.30154719948768616


Validating: 0it [00:00, ?it/s]

epoch = 20, custom_mae = 0.2894487977027893


Validating: 0it [00:00, ?it/s]

epoch = 21, custom_mae = 0.28000006079673767


Validating: 0it [00:00, ?it/s]

epoch = 22, custom_mae = 0.2761635184288025


Validating: 0it [00:00, ?it/s]

epoch = 23, custom_mae = 0.27215269207954407


Validating: 0it [00:00, ?it/s]

epoch = 24, custom_mae = 0.2715866267681122


Validating: 0it [00:00, ?it/s]

epoch = 25, custom_mae = 0.27100256085395813


Validating: 0it [00:00, ?it/s]

epoch = 26, custom_mae = 0.27201539278030396


Validating: 0it [00:00, ?it/s]

epoch = 27, custom_mae = 0.27236613631248474


Validating: 0it [00:00, ?it/s]

epoch = 28, custom_mae = 0.2735004127025604


Validating: 0it [00:00, ?it/s]

epoch = 29, custom_mae = 0.2826072871685028


Validating: 0it [00:00, ?it/s]

epoch = 30, custom_mae = 0.2785834074020386


Validating: 0it [00:00, ?it/s]

epoch = 31, custom_mae = 0.28620561957359314


Validating: 0it [00:00, ?it/s]

epoch = 32, custom_mae = 0.30338606238365173


Validating: 0it [00:00, ?it/s]

epoch = 33, custom_mae = 0.3022228181362152


Validating: 0it [00:00, ?it/s]

epoch = 34, custom_mae = 0.31525012850761414


Validating: 0it [00:00, ?it/s]

epoch = 35, custom_mae = 0.34386610984802246


Validating: 0it [00:00, ?it/s]

epoch = 36, custom_mae = 0.3171495795249939


Validating: 0it [00:00, ?it/s]

epoch = 37, custom_mae = 0.3044433891773224


Validating: 0it [00:00, ?it/s]

epoch = 38, custom_mae = 0.368510365486145


Validating: 0it [00:00, ?it/s]

epoch = 39, custom_mae = 0.3108868896961212


Validating: 0it [00:00, ?it/s]

epoch = 40, custom_mae = 0.3284807801246643


Validating: 0it [00:00, ?it/s]

epoch = 41, custom_mae = 0.36033111810684204


Validating: 0it [00:00, ?it/s]

epoch = 42, custom_mae = 0.36365076899528503


Validating: 0it [00:00, ?it/s]

epoch = 43, custom_mae = 0.38167840242385864


Validating: 0it [00:00, ?it/s]

epoch = 44, custom_mae = 0.3739611506462097


Validating: 0it [00:00, ?it/s]

epoch = 45, custom_mae = 0.3461375832557678


Validating: 0it [00:00, ?it/s]

epoch = 46, custom_mae = 0.32232213020324707


Validating: 0it [00:00, ?it/s]

epoch = 47, custom_mae = 0.42227867245674133


Validating: 0it [00:00, ?it/s]

epoch = 48, custom_mae = 0.3856123089790344


Validating: 0it [00:00, ?it/s]

epoch = 49, custom_mae = 0.35373061895370483


Validating: 0it [00:00, ?it/s]

epoch = 50, custom_mae = 0.3653511703014374


Validating: 0it [00:00, ?it/s]

epoch = 51, custom_mae = 0.36320754885673523


Validating: 0it [00:00, ?it/s]

epoch = 52, custom_mae = 0.35475456714630127


Validating: 0it [00:00, ?it/s]

epoch = 53, custom_mae = 0.31206923723220825


Validating: 0it [00:00, ?it/s]

epoch = 54, custom_mae = 0.31231316924095154


Validating: 0it [00:00, ?it/s]

epoch = 55, custom_mae = 0.2880263030529022


Validating: 0it [00:00, ?it/s]

epoch = 56, custom_mae = 0.2816425859928131


Validating: 0it [00:00, ?it/s]

epoch = 57, custom_mae = 0.26734817028045654


Validating: 0it [00:00, ?it/s]

epoch = 58, custom_mae = 0.29584863781929016


Validating: 0it [00:00, ?it/s]

epoch = 59, custom_mae = 0.24945347011089325


Validating: 0it [00:00, ?it/s]

epoch = 60, custom_mae = 0.24571387469768524


Validating: 0it [00:00, ?it/s]

epoch = 61, custom_mae = 0.25020143389701843


Validating: 0it [00:00, ?it/s]

epoch = 62, custom_mae = 0.27783653140068054


Validating: 0it [00:00, ?it/s]

epoch = 63, custom_mae = 0.2400028258562088


Validating: 0it [00:00, ?it/s]

epoch = 64, custom_mae = 0.2470596432685852


Validating: 0it [00:00, ?it/s]

epoch = 65, custom_mae = 0.21345701813697815


Validating: 0it [00:00, ?it/s]

epoch = 66, custom_mae = 0.2103177160024643


Validating: 0it [00:00, ?it/s]

epoch = 67, custom_mae = 0.20008105039596558


Validating: 0it [00:00, ?it/s]

epoch = 68, custom_mae = 0.19387325644493103


Validating: 0it [00:00, ?it/s]

epoch = 69, custom_mae = 0.19036799669265747


Validating: 0it [00:00, ?it/s]

epoch = 70, custom_mae = 0.19140057265758514


Validating: 0it [00:00, ?it/s]

epoch = 71, custom_mae = 0.1899551898241043


Validating: 0it [00:00, ?it/s]

epoch = 72, custom_mae = 0.18335726857185364


Validating: 0it [00:00, ?it/s]

epoch = 73, custom_mae = 0.1829586625099182


Validating: 0it [00:00, ?it/s]

epoch = 74, custom_mae = 0.1823744922876358


Validating: 0it [00:00, ?it/s]

epoch = 75, custom_mae = 0.1823531836271286


Validating: 0it [00:00, ?it/s]

epoch = 76, custom_mae = 0.18217569589614868


Validating: 0it [00:00, ?it/s]

epoch = 77, custom_mae = 0.18267452716827393


Validating: 0it [00:00, ?it/s]

epoch = 78, custom_mae = 0.18493716418743134


Validating: 0it [00:00, ?it/s]

epoch = 79, custom_mae = 0.18656757473945618


Validating: 0it [00:00, ?it/s]

epoch = 80, custom_mae = 0.1863320767879486


Validating: 0it [00:00, ?it/s]

epoch = 81, custom_mae = 0.20729926228523254


Validating: 0it [00:00, ?it/s]

epoch = 82, custom_mae = 0.19515737891197205


Validating: 0it [00:00, ?it/s]

epoch = 83, custom_mae = 0.20019446313381195


Validating: 0it [00:00, ?it/s]

epoch = 84, custom_mae = 0.21118180453777313


Validating: 0it [00:00, ?it/s]

epoch = 85, custom_mae = 0.20109738409519196


Validating: 0it [00:00, ?it/s]

epoch = 86, custom_mae = 0.22794553637504578


Validating: 0it [00:00, ?it/s]

epoch = 87, custom_mae = 0.21821044385433197


Validating: 0it [00:00, ?it/s]

epoch = 88, custom_mae = 0.2185036540031433


Validating: 0it [00:00, ?it/s]

epoch = 89, custom_mae = 0.338981568813324


Validating: 0it [00:00, ?it/s]

epoch = 90, custom_mae = 0.2547873854637146


Validating: 0it [00:00, ?it/s]

epoch = 91, custom_mae = 0.24046552181243896


Validating: 0it [00:00, ?it/s]

epoch = 92, custom_mae = 0.23794886469841003


Validating: 0it [00:00, ?it/s]

epoch = 93, custom_mae = 0.2584267258644104


Validating: 0it [00:00, ?it/s]

epoch = 94, custom_mae = 0.2734359800815582


Validating: 0it [00:00, ?it/s]

epoch = 95, custom_mae = 0.3339919149875641


Validating: 0it [00:00, ?it/s]

epoch = 96, custom_mae = 0.24158723652362823


Validating: 0it [00:00, ?it/s]

epoch = 97, custom_mae = 0.33024370670318604


Validating: 0it [00:00, ?it/s]

epoch = 98, custom_mae = 0.7323112487792969


Validating: 0it [00:00, ?it/s]

epoch = 99, custom_mae = 0.3152826726436615


Validating: 0it [00:00, ?it/s]

epoch = 100, custom_mae = 0.254889577627182


Validating: 0it [00:00, ?it/s]

epoch = 101, custom_mae = 0.2709949314594269


Validating: 0it [00:00, ?it/s]

epoch = 102, custom_mae = 0.27102965116500854


Validating: 0it [00:00, ?it/s]

epoch = 103, custom_mae = 0.22835196554660797


Validating: 0it [00:00, ?it/s]

epoch = 104, custom_mae = 0.2591596841812134


Validating: 0it [00:00, ?it/s]

epoch = 105, custom_mae = 0.29240307211875916


Validating: 0it [00:00, ?it/s]

epoch = 106, custom_mae = 0.22149884700775146


Validating: 0it [00:00, ?it/s]

epoch = 107, custom_mae = 0.2298567295074463


Validating: 0it [00:00, ?it/s]

epoch = 108, custom_mae = 0.22532673180103302


Validating: 0it [00:00, ?it/s]

epoch = 109, custom_mae = 0.20040304958820343


Validating: 0it [00:00, ?it/s]

epoch = 110, custom_mae = 0.2012825608253479


Validating: 0it [00:00, ?it/s]

epoch = 111, custom_mae = 0.19523285329341888


Validating: 0it [00:00, ?it/s]

epoch = 112, custom_mae = 0.21290867030620575


Validating: 0it [00:00, ?it/s]

epoch = 113, custom_mae = 0.24096931517124176


Validating: 0it [00:00, ?it/s]

epoch = 114, custom_mae = 0.17976228892803192


Validating: 0it [00:00, ?it/s]

epoch = 115, custom_mae = 0.18512268364429474


Validating: 0it [00:00, ?it/s]

epoch = 116, custom_mae = 0.17814916372299194


Validating: 0it [00:00, ?it/s]

epoch = 117, custom_mae = 0.17444412410259247


Validating: 0it [00:00, ?it/s]

epoch = 118, custom_mae = 0.18235161900520325


Validating: 0it [00:00, ?it/s]

epoch = 119, custom_mae = 0.16926242411136627


Validating: 0it [00:00, ?it/s]

epoch = 120, custom_mae = 0.16732114553451538


Validating: 0it [00:00, ?it/s]

epoch = 121, custom_mae = 0.16454768180847168


Validating: 0it [00:00, ?it/s]

epoch = 122, custom_mae = 0.16445322334766388


Validating: 0it [00:00, ?it/s]

epoch = 123, custom_mae = 0.1641499102115631


Validating: 0it [00:00, ?it/s]

epoch = 124, custom_mae = 0.1638048142194748


Validating: 0it [00:00, ?it/s]

epoch = 125, custom_mae = 0.1638024002313614


Validating: 0it [00:00, ?it/s]

epoch = 126, custom_mae = 0.1641004979610443


Validating: 0it [00:00, ?it/s]

epoch = 127, custom_mae = 0.16398583352565765


Validating: 0it [00:00, ?it/s]

epoch = 128, custom_mae = 0.16443480551242828


Validating: 0it [00:00, ?it/s]

epoch = 129, custom_mae = 0.1663661152124405


Validating: 0it [00:00, ?it/s]

epoch = 130, custom_mae = 0.16600050032138824


Validating: 0it [00:00, ?it/s]

epoch = 131, custom_mae = 0.16822746396064758


Validating: 0it [00:00, ?it/s]

epoch = 132, custom_mae = 0.17409539222717285


Validating: 0it [00:00, ?it/s]

epoch = 133, custom_mae = 0.17205369472503662


Validating: 0it [00:00, ?it/s]

epoch = 134, custom_mae = 0.17970001697540283


Validating: 0it [00:00, ?it/s]

epoch = 135, custom_mae = 0.1860228031873703


Validating: 0it [00:00, ?it/s]

epoch = 136, custom_mae = 0.18921653926372528


Validating: 0it [00:00, ?it/s]

epoch = 137, custom_mae = 0.18701931834220886


Validating: 0it [00:00, ?it/s]

epoch = 138, custom_mae = 0.21698953211307526


Validating: 0it [00:00, ?it/s]

epoch = 139, custom_mae = 0.21302370727062225


Validating: 0it [00:00, ?it/s]

epoch = 140, custom_mae = 0.1936485469341278


Validating: 0it [00:00, ?it/s]

epoch = 141, custom_mae = 0.25872838497161865


Validating: 0it [00:00, ?it/s]

epoch = 142, custom_mae = 0.2131541520357132


Validating: 0it [00:00, ?it/s]

epoch = 143, custom_mae = 0.2951551377773285


Validating: 0it [00:00, ?it/s]

epoch = 144, custom_mae = 0.21852396428585052


Validating: 0it [00:00, ?it/s]

epoch = 145, custom_mae = 0.20160719752311707


Validating: 0it [00:00, ?it/s]

epoch = 146, custom_mae = 0.20994044840335846


Validating: 0it [00:00, ?it/s]

epoch = 147, custom_mae = 0.21201901137828827


Validating: 0it [00:00, ?it/s]

epoch = 148, custom_mae = 0.2004326581954956


Validating: 0it [00:00, ?it/s]

epoch = 149, custom_mae = 0.22230614721775055


Validating: 0it [00:00, ?it/s]

epoch = 150, custom_mae = 0.22557274997234344


Validating: 0it [00:00, ?it/s]

epoch = 151, custom_mae = 0.22476112842559814


Validating: 0it [00:00, ?it/s]

epoch = 152, custom_mae = 0.25680142641067505


Validating: 0it [00:00, ?it/s]

epoch = 153, custom_mae = 0.2028188854455948


Validating: 0it [00:00, ?it/s]

epoch = 154, custom_mae = 0.2680703401565552


Validating: 0it [00:00, ?it/s]

epoch = 155, custom_mae = 0.20500649511814117


Validating: 0it [00:00, ?it/s]

epoch = 156, custom_mae = 0.19553588330745697


Validating: 0it [00:00, ?it/s]

epoch = 157, custom_mae = 0.18741470575332642


Validating: 0it [00:00, ?it/s]

epoch = 158, custom_mae = 0.18904313445091248


Validating: 0it [00:00, ?it/s]

epoch = 159, custom_mae = 0.1814422905445099


Validating: 0it [00:00, ?it/s]

epoch = 160, custom_mae = 0.178655743598938


Validating: 0it [00:00, ?it/s]

epoch = 161, custom_mae = 0.18973927199840546


Validating: 0it [00:00, ?it/s]

epoch = 162, custom_mae = 0.18407757580280304


Validating: 0it [00:00, ?it/s]

epoch = 163, custom_mae = 0.17347857356071472


Validating: 0it [00:00, ?it/s]

epoch = 164, custom_mae = 0.17200905084609985


Validating: 0it [00:00, ?it/s]

epoch = 165, custom_mae = 0.16682974994182587


Validating: 0it [00:00, ?it/s]

epoch = 166, custom_mae = 0.16619789600372314


Validating: 0it [00:00, ?it/s]

epoch = 167, custom_mae = 0.16427570581436157


Validating: 0it [00:00, ?it/s]

epoch = 168, custom_mae = 0.16025467216968536


Validating: 0it [00:00, ?it/s]

epoch = 169, custom_mae = 0.15904080867767334


Validating: 0it [00:00, ?it/s]

epoch = 170, custom_mae = 0.15882158279418945


Validating: 0it [00:00, ?it/s]

epoch = 171, custom_mae = 0.15837602317333221


Validating: 0it [00:00, ?it/s]

epoch = 172, custom_mae = 0.15812665224075317


Validating: 0it [00:00, ?it/s]

epoch = 173, custom_mae = 0.15739673376083374


Validating: 0it [00:00, ?it/s]

epoch = 174, custom_mae = 0.15739281475543976


Validating: 0it [00:00, ?it/s]

epoch = 175, custom_mae = 0.15738369524478912


Validating: 0it [00:00, ?it/s]

epoch = 176, custom_mae = 0.15730123221874237


Validating: 0it [00:00, ?it/s]

epoch = 177, custom_mae = 0.15781812369823456


Validating: 0it [00:00, ?it/s]

epoch = 178, custom_mae = 0.15806816518306732


Validating: 0it [00:00, ?it/s]

epoch = 179, custom_mae = 0.1585826575756073


Validating: 0it [00:00, ?it/s]

epoch = 180, custom_mae = 0.15887582302093506


Validating: 0it [00:00, ?it/s]

epoch = 181, custom_mae = 0.1596585363149643


Validating: 0it [00:00, ?it/s]

epoch = 182, custom_mae = 0.16185761988162994


Validating: 0it [00:00, ?it/s]

epoch = 183, custom_mae = 0.16204887628555298


Validating: 0it [00:00, ?it/s]

epoch = 184, custom_mae = 0.1694241315126419


Validating: 0it [00:00, ?it/s]

epoch = 185, custom_mae = 0.1756506860256195


Validating: 0it [00:00, ?it/s]

epoch = 186, custom_mae = 0.1692158281803131


Validating: 0it [00:00, ?it/s]

epoch = 187, custom_mae = 0.18105943500995636


Validating: 0it [00:00, ?it/s]

epoch = 188, custom_mae = 0.19137829542160034


Validating: 0it [00:00, ?it/s]

epoch = 189, custom_mae = 0.19625665247440338


Validating: 0it [00:00, ?it/s]

epoch = 190, custom_mae = 0.19798928499221802


Validating: 0it [00:00, ?it/s]

epoch = 191, custom_mae = 0.21930566430091858


Validating: 0it [00:00, ?it/s]

epoch = 192, custom_mae = 0.18370577692985535


Validating: 0it [00:00, ?it/s]

epoch = 193, custom_mae = 0.19743283092975616


Validating: 0it [00:00, ?it/s]

epoch = 194, custom_mae = 0.20658382773399353


Validating: 0it [00:00, ?it/s]

epoch = 195, custom_mae = 0.19800418615341187


Validating: 0it [00:00, ?it/s]

epoch = 196, custom_mae = 0.20116156339645386


Validating: 0it [00:00, ?it/s]

epoch = 197, custom_mae = 0.2437773495912552


Validating: 0it [00:00, ?it/s]

epoch = 198, custom_mae = 0.2372736781835556


Validating: 0it [00:00, ?it/s]

epoch = 199, custom_mae = 0.23727788031101227
train done.
validate done.
fold = 0, auc = 0.15730121505182193
inference done.


In [None]:
if len(CFG.folds) != CFG.n_folds:

    oof_score = get_score(oof_pred, oof_target, val_df['u_out'].values)
    print(f'MAE {oof_score}')

    oof_df = train.iloc[val_idxes[0], :1]
    oof_df['pressure'] = oof_pred
    oof_df.to_csv(OUTPUT_DIR / f'oof{CFG.exp_num}.csv',index = False)    
else:
    score = get_score(y, oof_total, train['u_out'].values)
    print(f'MAE {score}: folds: {scores}')

    oof_df = pd.DataFrame({'id': train['id'].values, 'pressure':oof_total.reshape(-1)})
    oof_df.to_csv(OUTPUT_DIR / f'oof{CFG.exp_num}.csv',index = False)
oof_df

In [None]:
sub = pd.read_csv(DATA_DIR / 'sample_submission.csv')
sub['pressure'] = np.mean(sub_preds, axis=1)
sub.to_csv(OUTPUT_DIR / f'sub{CFG.exp_num}.csv',index = False)
sub

In [None]:
wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type='summary')
wandb.run.name = 'summary'
wandb.log({'CV_score': oof_score})
# wandb.save(utils.get_notebook_path())
wandb.finish()

Exception in thread Thread-8:
Traceback (most recent call last):
  File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/user/.local/lib/python3.8/site-packages/wandb/sdk/wandb_run.py", line 197, in check_network_status
    status_response = self._interface.communicate_network_status()
  File "/home/user/.local/lib/python3.8/site-packages/wandb/sdk/interface/interface.py", line 749, in communicate_network_status
    resp = self._communicate(req, timeout=timeout, local=True)
  File "/home/user/.local/lib/python3.8/site-packages/wandb/sdk/interface/interface.py", line 539, in _communicate
    return self._communicate_async(rec, local=local).get(timeout=timeout)
  File "/home/user/.local/lib/python3.8/site-packages/wandb/sdk/interface/interface.py", line 544, in _communicate_async
    raise Exception("The wandb backend process has shutdown")