In [1]:
import sys
from time import time
import numpy as np
import pandas as pd
from pathlib import Path
import lightgbm as lgb
import matplotlib.pyplot as plt 
import seaborn as sns
from tqdm import tqdm
import copy
import wandb
from collections import OrderedDict

from sklearn.metrics import mean_absolute_error
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as torchdata

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import WandbLogger


In [2]:
sys.path.append('../../src/')
import utils as utils
from utils import Timer

In [3]:
class CFG:
    seed = 42
    exp_num = 16
    local = True
    n_folds = 5
    folds = [0]
    debug = False
    bias = 1000
    epochs = 200

    
    ######################
    # Dataset #
    ######################
    transforms = {
        "train": [{"name": ""}],
        "valid": [{"name": ""}],
        "test": [{"name": ""}]
    }

    ######################
    # Loaders #
    ######################
    loader_params = {
        "train": {
            'batch_size': 128,
            'shuffle': True,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': True,
        },
        "valid": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        },
        "test": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        }
    }

    ######################
    # Split #
    ######################
    split = "GroupKFold"
    split_params = {
        "n_splits": 5,
    }

    ######################
    # Model #
    ######################
    input_dim = 5

    dense_dim = 512
    lstm_dim = 512
    logit_dim = 512
    num_classes = 1

    ######################
    # Criterion #
    ######################
#     loss_name = "rmspe_loss"
#     loss_params: dict = {}

    ######################
    # Optimizer #
    ######################
    optimizer_name = "AdamW"
    optimizer_params = {
        "lr": 0.001,
        'weight_decay': 1e-6
    }

    ######################
    # Scheduler #
    ######################
    scheduler_name = "CosineAnnealingLR"
    scheduler_params = {
        'T_max': 25, 
        'eta_min': 1e-6
    }

In [4]:
utils.set_seed(CFG.seed)

In [5]:
if CFG.local:
    DATA_DIR = Path("/home/knikaido/work/Ventilator-Pressure-Prediction/data/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('./output/')
else:
    DATA_DIR = Path("../input/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('')   

In [6]:
def get_transforms(phase: str):
    transforms = CFG.transforms
    if transforms is None:
        return None
    else:
        if transforms[phase] is None:
            return None
        trns_list = []
        for trns_conf in transforms[phase]:
            trns_name = trns_conf["name"]
            trns_params = {} if trns_conf.get("params") is None else \
                trns_conf["params"]
            if globals().get(trns_name) is not None:
                trns_cls = globals()[trns_name]
                trns_list.append(trns_cls(**trns_params))

        if len(trns_list) > 0:
            return Compose(trns_list)
        else:
            return None
        
        
class Normalize:
    def __call__(self, y: np.ndarray):
        max_vol = np.abs(y).max()
        y_vol = y * 1 / max_vol
        return np.asfortranarray(y_vol)


class Compose:
    def __init__(self, transforms: list):
        self.transforms = transforms

    def __call__(self, y: np.ndarray):
        for trns in self.transforms:
            y = trns(y)
        return y

In [7]:
def compute_metric(preds, trues, u_outs):
    """
    Metric for the problem, as I understood it.
    """
    
    y = trues
    w = 1 - u_outs
    
    assert y.shape == preds.shape and w.shape == y.shape, (y.shape, preds.shape, w.shape)
    
    mae = w * np.abs(y - preds)
    mae = mae.sum() / w.sum()
    
    return mae


class VentilatorLoss(nn.Module):
    """
    Directly optimizes the competition metric
    """
    def __call__(self, preds, y, u_out):
        w = 1 - u_out
        mae = w * (y - preds).abs()
        mae = mae.sum(-1) / w.sum(-1)

        return mae

In [8]:
def get_criterion():
    return VentilatorLoss()

In [9]:
# Custom optimizer
__OPTIMIZERS__ = {}


def get_optimizer(model: nn.Module):
    optimizer_name = CFG.optimizer_name
    if optimizer_name == "SAM":
        base_optimizer_name = CFG.base_optimizer
        if __OPTIMIZERS__.get(base_optimizer_name) is not None:
            base_optimizer = __OPTIMIZERS__[base_optimizer_name]
        else:
            base_optimizer = optim.__getattribute__(base_optimizer_name)
        return SAM(model.parameters(), base_optimizer, **CFG.optimizer_params)

    if __OPTIMIZERS__.get(optimizer_name) is not None:
        return __OPTIMIZERS__[optimizer_name](model.parameters(),
                                              **CFG.optimizer_params)
    else:
        return optim.__getattribute__(optimizer_name)(model.parameters(),
                                                      **CFG.optimizer_params)


def get_scheduler(optimizer):
    scheduler_name = CFG.scheduler_name

    if scheduler_name is None:
        return
    else:
        return optim.lr_scheduler.__getattribute__(scheduler_name)(
            optimizer, **CFG.scheduler_params)

In [10]:
# validation
splitter = getattr(model_selection, CFG.split)(**CFG.split_params)

In [11]:
class VentilatorDataset(torchdata.Dataset):
    def __init__(self, df, train_value_col, train_category_col):
        if "pressure" not in df.columns:
            df['pressure'] = 0
        self.df = df
        self.groups = df.groupby('breath_id').groups
        self.keys = list(self.groups.keys())
        self.train_value_col = train_value_col
        self.train_category_col = train_category_col

        
    def __len__(self):
        return len(self.groups)

    def __getitem__(self, idx):
        indexes = self.groups[self.keys[idx]]
        df_ = self.df.iloc[indexes]
        
        input_value = df_[self.train_value_col].values
        input_category = df_[self.train_category_col].values

        u_out_ = df_['u_out'].values
        p_ = df_['pressure'].values

        data = {
            "input_value": input_value.astype(np.float32),
            "input_category": input_category.astype(int),
            "u_out": u_out_.astype(np.float32),
            "p": p_.astype(np.float32),
        }
        
        return data

In [12]:
class RNNModel(nn.Module):
    def __init__(
        self,
        input_dim=4,
        lstm_dim=256,
        dense_dim=256,
        logit_dim=256,
        num_classes=1,
    ):
        super().__init__()
        
        self.rc_emb = nn.Embedding(9, 4, padding_idx=0)

        self.mlp = nn.Sequential(
            nn.Linear(input_dim, dense_dim // 2),
            nn.ReLU(),
            nn.Linear(dense_dim // 2, dense_dim),
            nn.ReLU(),
        )

        self.lstm = nn.LSTM(dense_dim + 4, lstm_dim, num_layers=2, batch_first=True, bidirectional=True)

        self.logits = nn.Sequential(
            nn.Linear(lstm_dim * 2, logit_dim),
            nn.ReLU(),
            nn.Linear(logit_dim, num_classes),
        )
        
        for n, m in self.named_modules():
            if isinstance(m, nn.LSTM):
                print(f'init {m}')
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        nn.init.orthogonal_(param.data)
                    else:
                        nn.init.normal_(param.data)
            elif isinstance(m, nn.GRU):
                print(f"init {m}")
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        init.orthogonal_(param.data)
                    else:
                        init.normal_(param.data)

    def forward(self, cont_seq_x, cate_seq_x):
        bs = cont_seq_x.size(0)

        features = self.mlp(cont_seq_x)
        rc_emb = self.rc_emb(cate_seq_x).view(bs, 80, -1)
        features = torch.cat((rc_emb, features), 2)
        
        features, _ = self.lstm(features)
        pred = self.logits(features)
        return pred

In [13]:
# Learner class(pytorch-lighting)
class Learner(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.criterion = get_criterion()
    
    def training_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input_value'], d_['input_category'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        return loss
    
    def validation_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input_value'], d_['input_category'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        
        self.log(f'Loss/val', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
        
        output = OrderedDict({
            "targets": d_['p'].detach(), "preds": output.detach(), "u_outs": d_['u_out'].detach(), "loss": loss.detach()
        })
        return output
    
    def validation_epoch_end(self, outputs):

        targets = torch.cat([o["targets"].view(-1) for o in outputs]).cpu().numpy()
        preds = torch.cat([o["preds"].view(-1) for o in outputs]).cpu().numpy()
        u_outs = torch.cat([o["u_outs"].view(-1) for o in outputs]).cpu().numpy()

        score = get_score(preds, targets, u_outs)
        self.log(f'custom_mae/val', score, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        print(f'epoch = {self.current_epoch}, custom_mae = {score}')

    def configure_optimizers(self):
        optimizer = get_optimizer(self.model)
        scheduler = get_scheduler(optimizer)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "Loss/val"}

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
def get_score(y_pred, y_true, u_outs):
    return compute_metric(y_pred, y_true, u_outs)


def to_np(input):
    return input.detach().cpu().numpy()

# oof
def evaluate(model, loaders, phase):
    model.eval()
    pred_list = []
    target_list = []
    with torch.no_grad():
        for batch in loaders[phase]:
            d_ = batch
            d_['input_value'] = d_['input_value'].to(device)
            d_['input_category'] = d_['input_category'].to(device)
            output = model(d_['input_value'], d_['input_category'])
#             output = nn.Softmax(dim=1)(output)
            pred_list.append(to_np(output))
            target_list.append(to_np(d_['p']))

    pred_list = np.concatenate(pred_list).reshape(-1)
    target_list = np.concatenate(target_list).reshape(-1)
    model.train()
    return pred_list, target_list

In [16]:
train = pd.read_csv(DATA_DIR / 'train.csv')
test = pd.read_csv(DATA_DIR / 'test.csv')
display(train), display(test)

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.000000,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.355850,0,12.234987
...,...,...,...,...,...,...,...,...
6035995,6035996,125749,50,10,2.504603,1.489714,1,3.869032
6035996,6035997,125749,50,10,2.537961,1.488497,1,3.869032
6035997,6035998,125749,50,10,2.571408,1.558978,1,3.798729
6035998,6035999,125749,50,10,2.604744,1.272663,1,4.079938


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.000000,0.000000,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.230610,0
4,5,0,5,20,0.127644,26.320956,0
...,...,...,...,...,...,...,...
4023995,4023996,125748,20,10,2.530117,4.971245,1
4023996,4023997,125748,20,10,2.563853,4.975709,1
4023997,4023998,125748,20,10,2.597475,4.979468,1
4023998,4023999,125748,20,10,2.631134,4.982648,1


(None, None)

In [17]:
def get_raw_features(input_df, dataType = 'train'):
    colum = ['time_step', 'u_in', 'R', 'C']

    return input_df[colum]

In [18]:
def get_category_features(input_df, dataType = 'train'):
    output_df = copy.deepcopy(input_df)
    colum = ['R_C']
    rc_map = {'5_10': 0, '5_20': 1, '5_50': 2, '20_10': 3, '20_20': 4, '20_50': 5, '50_10': 6, '50_20': 7, '50_50': 8}
    
    output_df['R_C'] = [f'{r}_{c}' for r, c in zip(output_df['R'], output_df['C'])]
    output_df['R_C'] = output_df['R_C'].map(rc_map)

    return output_df[colum]

In [19]:
def get_diff_shift_features(input_df, dataType = 'train'):
    
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    b_id_gby = input_df.groupby(['breath_id'])
    shift_idx = [-2, -1, 1, 2, 3, 4]
    
    def g_by_diff(c_, i):
        temp_df=pd.concat([output_df.loc[:, ['breath_id', c_]], output_df.loc[:, ['breath_id', c_]].reset_index().shift(i).rename(columns=lambda s:s+'_shift')], axis=1)
        df_with_diff=temp_df.loc[(temp_df['breath_id']==temp_df['breath_id_shift']), slice(None)]
        return(df_with_diff[c_]-df_with_diff[f'{c_}_shift'])
    
    # diffより直接引いたほうが早い
    for i in shift_idx:
        output_df[f'u_in_shift_{i}'] = b_id_gby['u_in'].shift(i)
        output_df[f'u_in_diff_{i}'] = g_by_diff('u_in', i)

        output_df[f'time_step_shift_{i}'] = b_id_gby['time_step'].shift(i)
        output_df[f'time_step_diff_{i}'] = g_by_diff('time_step', i)
    
    return output_df.iloc[:, c_num:]

In [20]:
def get_cum_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    b_id_gby = input_df.groupby(['breath_id'])
    
    output_df['u_in_cumsum'] = b_id_gby['u_in'].cumsum()
    output_df['time_step_cumsum'] = b_id_gby['time_step'].cumsum()
    
    return output_df.iloc[:, c_num:]

In [21]:
def get_agg_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    # Dict for aggregations
    create_feature_dict = {
        'u_in': [np.max, np.std, np.mean, 'first', 'last'],
    }
    
    def get_agg_window(start_time=0, end_time=3.0, add_suffix = False):
        
        df_tgt = output_df[(output_df['time_step'] >= start_time) & (output_df['time_step'] <= end_time)]
        df_feature = df_tgt.groupby(['breath_id']).agg(create_feature_dict)
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(start_time) + '_' + str(end_time))
            
        return df_feature
    
    df_agg_feature = get_agg_window().reset_index()
    
#     df_tmp = get_agg_window(start_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(start_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')

    output_df = pd.merge(output_df, df_agg_feature, how='left', on='breath_id')
    
    
    return output_df.iloc[:, c_num:]

In [22]:
def to_feature(input_df, dataType = 'train'):
    """input_df を特徴量行列に変換した新しいデータフレームを返す.
    """

    processors = [
        get_raw_features,
        get_category_features,
        get_diff_shift_features,
        get_cum_features,
        get_agg_features
    ]

    out_df = pd.DataFrame()

    for func in tqdm(processors, total=len(processors)):
        with Timer(prefix='' + func.__name__ + ' '):
            _df = func(input_df, dataType)

        # 長さが等しいことをチェック (ずれている場合, func の実装がおかしい)
        assert len(_df) == len(input_df), func.__name__
        out_df = pd.concat([out_df, _df], axis=1)
#     out_df = utils.reduce_mem_usage(out_df)
    
    return out_df

In [23]:
train_df = to_feature(train, dataType = 'train')
test_df = to_feature(test, dataType = 'test')

  0%|          | 0/5 [00:00<?, ?it/s]

get_raw_features  0.028[s]


 40%|████      | 2/5 [00:02<00:03,  1.22s/it]

get_category_features  2.355[s]
get_diff_shift_features  11.146[s]


 60%|██████    | 3/5 [00:14<00:11,  5.60s/it]

get_cum_features  0.201[s]


 80%|████████  | 4/5 [00:14<00:03,  3.81s/it]

get_agg_features  1.158[s]


100%|██████████| 5/5 [00:16<00:00,  3.36s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

get_raw_features  0.018[s]


 40%|████      | 2/5 [00:01<00:02,  1.24it/s]

get_category_features  1.546[s]
get_diff_shift_features  6.983[s]


 60%|██████    | 3/5 [00:08<00:07,  3.53s/it]

get_cum_features  0.115[s]


 80%|████████  | 4/5 [00:09<00:02,  2.40s/it]

get_agg_features  0.700[s]


100%|██████████| 5/5 [00:10<00:00,  2.12s/it]


In [24]:
train_value_col = [i for i in train_df.columns.to_list() if i not in ['R_C']]
train_category_col = ['R_C']

In [25]:
ss = StandardScaler()

train_category = train_df[train_category_col]
train_df = pd.DataFrame(ss.fit_transform(train_df[train_value_col]), columns=train_value_col)
train_mean = train_df.mean()
train_df = train_df.fillna(train_df.mean())

test_category = test_df[train_category_col]
test_df = pd.DataFrame(ss.transform(test_df[train_value_col]), columns=train_value_col)
test_df = test_df.fillna(train_mean)

In [26]:
display(train_df), display(test_df)

Unnamed: 0,time_step,u_in,R,C,u_in_shift_-2,u_in_diff_-2,time_step_shift_-2,time_step_diff_-2,u_in_shift_-1,u_in_diff_-1,...,u_in_diff_4,time_step_shift_4,time_step_diff_4,u_in_cumsum,time_step_cumsum,u_in_amax,u_in_std,u_in_mean,u_in_first,u_in_last
0,-1.706609,-0.538775,-0.359072,1.394522,1.239960e+00,-2.503819e+00,-1.703981e+00,-5.670750e-01,8.443515e-01,-2.512338e+00,...,-4.852934e-17,4.273702e-17,-1.790277e-15,-0.980690,-1.116536,-0.245401,0.119327,0.513555,-0.550080,0.281162
1,-1.662676,0.823348,-0.359072,1.394522,1.263865e+00,-5.308005e-01,-1.658426e+00,-7.288073e-01,1.156962e+00,-5.787926e-01,...,-4.852934e-17,4.273702e-17,-1.790277e-15,-0.936302,-1.115471,-0.245401,0.119327,0.513555,-0.550080,0.281162
2,-1.618468,1.130480,-0.359072,1.394522,1.467130e+00,-3.577016e-01,-1.612622e+00,-8.800655e-01,1.179656e+00,-5.675528e-02,...,-4.852934e-17,4.273702e-17,-1.790277e-15,-0.881950,-1.113335,-0.245401,0.119327,0.513555,-0.550080,0.281162
3,-1.574044,1.152777,-0.359072,1.394522,1.619080e+00,-5.335696e-01,-1.567182e+00,-8.432008e-01,1.372623e+00,-3.633570e-01,...,-4.852934e-17,4.273702e-17,-1.790277e-15,-0.826876,-1.110123,-0.245401,0.119327,0.513555,-0.550080,0.281162
4,-1.529378,1.342362,-0.359072,1.394522,1.608516e+00,-2.398765e-01,-1.521650e+00,-7.557113e-01,1.516875e+00,-2.756372e-01,...,3.505107e+00,-1.705506e+00,7.451631e-01,-0.765651,-1.105828,-0.245401,0.119327,0.513555,-0.550080,0.281162
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035995,1.563202,-0.434092,1.171893,-0.937525,-4.319748e-01,-5.327759e-02,1.648132e+00,-2.612726e-01,-4.356080e-01,-1.572543e-02,...,1.286697e-01,1.553074e+00,2.412574e-01,-0.046169,1.893579,-0.334837,-0.430115,-0.438246,-0.313487,-3.249996
6035996,1.606751,-0.434183,1.171893,-0.937525,-4.548240e-01,-2.202768e-02,1.692762e+00,-2.523388e-01,-4.302682e-01,-2.550644e-02,...,1.291340e-01,1.598890e+00,2.455403e-01,-0.042575,1.973871,-0.334837,-0.430115,-0.438246,-0.313487,-3.249996
6035997,1.650417,-0.428937,1.171893,-0.937525,-4.380590e-01,-3.732884e-02,1.737306e+00,-1.774798e-01,-4.519599e-01,2.316763e-02,...,1.391864e-01,1.644692e+00,2.719253e-01,-0.038811,2.055221,-0.334837,-0.430115,-0.438246,-0.313487,-3.249996
6035998,1.693939,-0.450248,1.171893,-0.937525,2.584907e-16,7.782393e-17,-1.094350e-16,2.408285e-15,-4.360442e-01,-4.454998e-02,...,9.167592e-02,1.690600e+00,2.566442e-01,-0.035738,2.137626,-0.334837,-0.430115,-0.438246,-0.313487,-3.249996


Unnamed: 0,time_step,u_in,R,C,u_in_shift_-2,u_in_diff_-2,time_step_shift_-2,time_step_diff_-2,u_in_shift_-1,u_in_diff_-1,...,u_in_diff_4,time_step_shift_4,time_step_diff_4,u_in_cumsum,time_step_cumsum,u_in_amax,u_in_std,u_in_mean,u_in_first,u_in_last
0,-1.706609,-0.544978,-1.124554,-0.354513,6.128857e-01,-1.651673e+00,-1.708917e+00,1.020982e+00,2.097362e-02,-1.041094e+00,...,-4.852934e-17,4.273702e-17,-1.790277e-15,-0.980891,-1.116536,0.048467,0.367378,0.364698,-0.553396,0.267353
1,-1.664958,0.014398,-1.124554,-0.354513,1.137916e+00,-1.549065e+00,-1.666179e+00,1.012356e+00,5.616568e-01,-9.894706e-01,...,-4.852934e-17,4.273702e-17,-1.790277e-15,-0.962745,-1.115526,0.048467,0.367378,0.364698,-0.553396,0.267353
2,-1.623282,0.545607,-1.124554,-0.354513,1.544151e+00,-1.324769e+00,-1.623482e+00,1.025706e+00,1.060088e+00,-9.133898e-01,...,-4.852934e-17,4.273702e-17,-1.790277e-15,-0.927367,-1.113507,0.048467,0.367378,0.364698,-0.553396,0.267353
3,-1.581604,1.035304,-1.124554,-0.354513,1.876616e+00,-1.060282e+00,-1.580758e+00,1.030121e+00,1.445741e+00,-7.103165e-01,...,-4.852934e-17,4.273702e-17,-1.790277e-15,-0.876103,-1.110478,0.048467,0.367378,0.364698,-0.553396,0.267353
4,-1.539968,1.414199,-1.124554,-0.354513,2.120740e+00,-8.376255e-01,-1.538032e+00,1.020777e+00,1.761363e+00,-5.842147e-01,...,3.645890e+00,-1.705506e+00,-1.053881e+00,-0.812548,-1.106440,0.048467,0.367378,0.364698,-0.553396,0.267353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4023995,1.596511,-0.174948,-0.359072,-0.937525,-1.590032e-01,-4.658669e-02,1.683030e+00,-4.998149e-01,-1.714109e-01,-1.650030e-02,...,1.152955e-01,1.585413e+00,6.806547e-01,-0.510729,1.922381,-0.027660,-0.551736,-0.843438,-0.156138,0.279443
4023996,1.640554,-0.174615,-0.359072,-0.937525,-1.587494e-01,-4.644606e-02,1.728091e+00,-4.664415e-01,-1.711261e-01,-1.640419e-02,...,1.147211e-01,1.631512e+00,7.232725e-01,-0.498714,2.003492,-0.027660,-0.551736,-0.843438,-0.156138,0.279443
4023997,1.684448,-0.174336,-0.359072,-0.937525,-1.585319e-01,-4.633274e-02,1.773832e+00,-7.012871e-01,-1.708851e-01,-1.632528e-02,...,1.142163e-01,1.678477e+00,6.008123e-01,-0.486691,2.085667,-0.027660,-0.551736,-0.843438,-0.156138,0.279443
4023998,1.728391,-0.174099,-0.359072,-0.937525,2.584907e-16,7.782393e-17,-1.094350e-16,2.408285e-15,-1.706787e-01,-1.626316e-02,...,1.137965e-01,1.725193e+00,5.266806e-01,-0.474660,2.168907,-0.027660,-0.551736,-0.843438,-0.156138,0.279443


(None, None)

In [27]:
train_df = pd.concat([train_df, train_category, train[['id', 'breath_id', 'pressure', 'u_out']]], axis=1)
test_df = pd.concat([test_df, test_category, test[['id', 'breath_id', 'u_out']]], axis=1)

In [28]:
train_df = utils.reduce_mem_usage(train_df)
test_df = utils.reduce_mem_usage(test_df)

Mem. usage decreased from 1842.04 Mb to 472.02 Mb (74.4% reduction)
Mem. usage decreased from 1197.33 Mb to 307.01 Mb (74.4% reduction)


In [29]:
oof_total = np.zeros((len(train), CFG.num_classes))
sub_preds = np.zeros((test.shape[0], len(CFG.folds)))
val_idxes = []
models = []
y = train['pressure']
groups = train['breath_id']
gkfold = model_selection.GroupKFold(n_splits=CFG.n_folds)
scores = []
input_dim = len(train_value_col)

for i, (trn_idx, val_idx) in enumerate(splitter.split(train_df, y, groups)):
    if i not in CFG.folds:
        continue

    trn_df = train_df.loc[trn_idx, :].reset_index(drop=True)
    val_df = train_df.loc[val_idx, :].reset_index(drop=True)
    trn_y = y.values[trn_idx]
    val_y = y.values[val_idx]
    
    
    loaders = {
        phase: torchdata.DataLoader(
            VentilatorDataset(
                df_, train_value_col, train_category_col
            ),
            **CFG.loader_params[phase])  # type: ignore
        for phase, df_ in zip(["train", "valid", "test"], [trn_df, val_df, test_df])
    }
    
    
    model = RNNModel(
        input_dim=input_dim,
        lstm_dim=CFG.lstm_dim,
        dense_dim=CFG.dense_dim,
        logit_dim=CFG.logit_dim,
        num_classes=CFG.num_classes,
    )
    model_name = model.__class__.__name__
#     break
    
    learner = Learner(model)
    
    # loggers
    RUN_NAME = f'exp{str(CFG.exp_num)}'
    wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type=RUN_NAME + f'-fold-{i}')
    wandb.run.name = RUN_NAME + f'-fold-{i}'
    wandb_config = wandb.config
    wandb_config.model_name = model_name
    wandb.watch(model)
    
    # callbacks
    callbacks = []
    checkpoint_callback = ModelCheckpoint(
        monitor=f'Loss/val',
        mode='min',
        dirpath=OUTPUT_DIR,
        verbose=False,
        save_weights_only=True,
        filename=f'{model_name}-{learner.current_epoch}-{i}')
    callbacks.append(checkpoint_callback)

#     early_stop_callback = EarlyStopping(
#         monitor='Loss/val',
#         min_delta=0.00,
#         patience=10,
#         verbose=True,
#         mode='min')
#     callbacks.append(early_stop_callback)
    
    loggers = []
    loggers.append(WandbLogger())
    
    trainer = pl.Trainer(
        logger=loggers,
        callbacks=callbacks,
        max_epochs=CFG.epochs,
        default_root_dir=OUTPUT_DIR,
        gpus=1,
#         fast_dev_run=DEBUG,
        deterministic=True,
        benchmark=False,
        )
    
    trainer.fit(learner, train_dataloader=loaders['train'], val_dataloaders=loaders['valid'])
#     trainer.save_checkpoint(OUTPUT_DIR / "last.ckpt")
    print('train done.')
    
    #############
    # validation (to make oof)
    #############
    checkpoint = torch.load(checkpoint_callback.best_model_path)
    learner.load_state_dict(checkpoint['state_dict'])
    
    model = model.to(device)
    oof_pred, oof_target = evaluate(model, loaders, phase="valid")
    models.append(model)
    
    oof_score = get_score(oof_pred, oof_target, val_df['u_out'].values)
    scores.append(oof_score)
    oof_total[val_idx] = oof_pred.reshape(1, -1).T / CFG.bias
    val_idxes.append(val_idx)
    
    print('validate done.')
    print(f'fold = {i}, auc = {oof_score}')
    wandb.log({'CV_score': oof_score})
    
    #############
    # inference
    #############
    test_pred, _ = evaluate(model, loaders, phase="test")
    sub_preds[:, i] = test_pred
    
    print('inference done.')

# test_preds_total = np.array(test_preds_total)


init LSTM(516, 512, num_layers=2, batch_first=True, bidirectional=True)


[34m[1mwandb[0m: Currently logged in as: [33msqrt4kaido[0m (use `wandb login --relogin` to force relogin)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type           | Params
---------------------------------------------
0 | model     | RNNModel       | 11.2 M
1 | criterion | VentilatorLoss | 0     
---------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.739    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

epoch = 0, custom_mae = 17.437217712402344


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

epoch = 0, custom_mae = 1.0020461082458496


Validating: 0it [00:00, ?it/s]

epoch = 1, custom_mae = 0.8108624219894409


Validating: 0it [00:00, ?it/s]

epoch = 2, custom_mae = 0.7873766422271729


Validating: 0it [00:00, ?it/s]

epoch = 3, custom_mae = 0.7594568133354187


Validating: 0it [00:00, ?it/s]

epoch = 4, custom_mae = 0.672055721282959


Validating: 0it [00:00, ?it/s]

epoch = 5, custom_mae = 0.6533063054084778


Validating: 0it [00:00, ?it/s]

epoch = 6, custom_mae = 0.620347261428833


Validating: 0it [00:00, ?it/s]

epoch = 7, custom_mae = 0.6062431335449219


Validating: 0it [00:00, ?it/s]

epoch = 8, custom_mae = 0.5629440546035767


Validating: 0it [00:00, ?it/s]

epoch = 9, custom_mae = 0.5312570333480835


Validating: 0it [00:00, ?it/s]

epoch = 10, custom_mae = 0.5140487551689148


Validating: 0it [00:00, ?it/s]

epoch = 11, custom_mae = 0.5542536973953247


Validating: 0it [00:00, ?it/s]

epoch = 12, custom_mae = 0.5529924631118774


Validating: 0it [00:00, ?it/s]

epoch = 13, custom_mae = 0.4627933204174042


Validating: 0it [00:00, ?it/s]

epoch = 14, custom_mae = 0.45239976048469543


Validating: 0it [00:00, ?it/s]

epoch = 15, custom_mae = 0.4299108684062958


Validating: 0it [00:00, ?it/s]

epoch = 16, custom_mae = 0.4133901596069336


Validating: 0it [00:00, ?it/s]

epoch = 17, custom_mae = 0.39524418115615845


Validating: 0it [00:00, ?it/s]

epoch = 18, custom_mae = 0.41577714681625366


Validating: 0it [00:00, ?it/s]

epoch = 19, custom_mae = 0.3882715702056885


Validating: 0it [00:00, ?it/s]

epoch = 20, custom_mae = 0.3777891993522644


Validating: 0it [00:00, ?it/s]

epoch = 21, custom_mae = 0.36217689514160156


Validating: 0it [00:00, ?it/s]

epoch = 22, custom_mae = 0.3606014549732208


Validating: 0it [00:00, ?it/s]

epoch = 23, custom_mae = 0.3575487732887268


Validating: 0it [00:00, ?it/s]

epoch = 24, custom_mae = 0.35576778650283813


Validating: 0it [00:00, ?it/s]

epoch = 25, custom_mae = 0.35549017786979675


Validating: 0it [00:00, ?it/s]

epoch = 26, custom_mae = 0.3560328483581543


Validating: 0it [00:00, ?it/s]

epoch = 27, custom_mae = 0.35611817240715027


Validating: 0it [00:00, ?it/s]

epoch = 28, custom_mae = 0.3571445643901825


Validating: 0it [00:00, ?it/s]

epoch = 29, custom_mae = 0.3573533892631531


Validating: 0it [00:00, ?it/s]

epoch = 30, custom_mae = 0.35728350281715393


Validating: 0it [00:00, ?it/s]

epoch = 31, custom_mae = 0.35853302478790283


Validating: 0it [00:00, ?it/s]

epoch = 32, custom_mae = 0.38338011503219604


Validating: 0it [00:00, ?it/s]

epoch = 33, custom_mae = 0.38606420159339905


Validating: 0it [00:00, ?it/s]

epoch = 34, custom_mae = 0.37230876088142395


Validating: 0it [00:00, ?it/s]

epoch = 35, custom_mae = 0.3778439462184906


Validating: 0it [00:00, ?it/s]

epoch = 36, custom_mae = 0.4229448735713959


Validating: 0it [00:00, ?it/s]

epoch = 37, custom_mae = 0.40463101863861084


Validating: 0it [00:00, ?it/s]

epoch = 38, custom_mae = 0.3900514543056488


Validating: 0it [00:00, ?it/s]

epoch = 39, custom_mae = 0.46451881527900696


Validating: 0it [00:00, ?it/s]

epoch = 40, custom_mae = 0.44533029198646545


Validating: 0it [00:00, ?it/s]

epoch = 41, custom_mae = 0.4302144944667816


Validating: 0it [00:00, ?it/s]

epoch = 42, custom_mae = 0.3995261490345001


Validating: 0it [00:00, ?it/s]

epoch = 43, custom_mae = 0.47178754210472107


Validating: 0it [00:00, ?it/s]

epoch = 44, custom_mae = 0.41016530990600586


Validating: 0it [00:00, ?it/s]

epoch = 45, custom_mae = 0.5117592215538025


Validating: 0it [00:00, ?it/s]

epoch = 46, custom_mae = 0.4126783013343811


Validating: 0it [00:00, ?it/s]

epoch = 47, custom_mae = 0.43102580308914185


Validating: 0it [00:00, ?it/s]

epoch = 48, custom_mae = 0.4856990575790405


Validating: 0it [00:00, ?it/s]

epoch = 49, custom_mae = 0.40894246101379395


Validating: 0it [00:00, ?it/s]

epoch = 50, custom_mae = 0.4288565218448639


Validating: 0it [00:00, ?it/s]

epoch = 51, custom_mae = 0.43523961305618286


Validating: 0it [00:00, ?it/s]

epoch = 52, custom_mae = 0.45325928926467896


Validating: 0it [00:00, ?it/s]

epoch = 53, custom_mae = 0.3662160038948059


Validating: 0it [00:00, ?it/s]

epoch = 54, custom_mae = 0.5674684047698975


Validating: 0it [00:00, ?it/s]

epoch = 55, custom_mae = 0.41392964124679565


Validating: 0it [00:00, ?it/s]

epoch = 56, custom_mae = 0.3600611686706543


Validating: 0it [00:00, ?it/s]

epoch = 57, custom_mae = 0.408227801322937


Validating: 0it [00:00, ?it/s]

epoch = 58, custom_mae = 0.3968241214752197


Validating: 0it [00:00, ?it/s]

epoch = 59, custom_mae = 0.3345932364463806


Validating: 0it [00:00, ?it/s]

epoch = 60, custom_mae = 0.32580655813217163


Validating: 0it [00:00, ?it/s]

epoch = 61, custom_mae = 0.3141980469226837


Validating: 0it [00:00, ?it/s]

epoch = 62, custom_mae = 0.3451896607875824


Validating: 0it [00:00, ?it/s]

epoch = 63, custom_mae = 0.3016674816608429


Validating: 0it [00:00, ?it/s]

epoch = 64, custom_mae = 0.2991287112236023


Validating: 0it [00:00, ?it/s]

epoch = 65, custom_mae = 0.3052911162376404


Validating: 0it [00:00, ?it/s]

epoch = 66, custom_mae = 0.2863280773162842


Validating: 0it [00:00, ?it/s]

epoch = 67, custom_mae = 0.2829227149486542


Validating: 0it [00:00, ?it/s]

epoch = 68, custom_mae = 0.2682833671569824


Validating: 0it [00:00, ?it/s]

epoch = 69, custom_mae = 0.2676975429058075


Validating: 0it [00:00, ?it/s]

epoch = 70, custom_mae = 0.26166167855262756


Validating: 0it [00:00, ?it/s]

epoch = 71, custom_mae = 0.2594155967235565


Validating: 0it [00:00, ?it/s]

epoch = 72, custom_mae = 0.2590080797672272


Validating: 0it [00:00, ?it/s]

epoch = 73, custom_mae = 0.25713875889778137


Validating: 0it [00:00, ?it/s]

epoch = 74, custom_mae = 0.2567891478538513


Validating: 0it [00:00, ?it/s]

epoch = 75, custom_mae = 0.25663578510284424


Validating: 0it [00:00, ?it/s]

epoch = 76, custom_mae = 0.25661501288414


Validating: 0it [00:00, ?it/s]

epoch = 77, custom_mae = 0.2567797601222992


Validating: 0it [00:00, ?it/s]

epoch = 78, custom_mae = 0.25782695412635803


Validating: 0it [00:00, ?it/s]

epoch = 79, custom_mae = 0.2580093443393707


Validating: 0it [00:00, ?it/s]

epoch = 80, custom_mae = 0.25973179936408997


Validating: 0it [00:00, ?it/s]

epoch = 81, custom_mae = 0.2618767321109772


Validating: 0it [00:00, ?it/s]

epoch = 82, custom_mae = 0.26157939434051514


Validating: 0it [00:00, ?it/s]

epoch = 83, custom_mae = 0.2713295817375183


Validating: 0it [00:00, ?it/s]

epoch = 84, custom_mae = 0.27847930788993835


Validating: 0it [00:00, ?it/s]

epoch = 85, custom_mae = 0.2897033989429474


Validating: 0it [00:00, ?it/s]

epoch = 86, custom_mae = 0.28014296293258667


Validating: 0it [00:00, ?it/s]

epoch = 87, custom_mae = 0.2901665270328522


Validating: 0it [00:00, ?it/s]

epoch = 88, custom_mae = 0.2897951304912567


Validating: 0it [00:00, ?it/s]

epoch = 89, custom_mae = 0.34511080384254456


Validating: 0it [00:00, ?it/s]

epoch = 90, custom_mae = 0.41196906566619873


Validating: 0it [00:00, ?it/s]

epoch = 91, custom_mae = 0.3068455457687378


Validating: 0it [00:00, ?it/s]

epoch = 92, custom_mae = 0.35059598088264465


Validating: 0it [00:00, ?it/s]

epoch = 93, custom_mae = 0.32462039589881897


Validating: 0it [00:00, ?it/s]

epoch = 94, custom_mae = 0.30217236280441284


Validating: 0it [00:00, ?it/s]

epoch = 95, custom_mae = 0.4288979768753052


Validating: 0it [00:00, ?it/s]

epoch = 96, custom_mae = 0.3441360592842102


Validating: 0it [00:00, ?it/s]

epoch = 97, custom_mae = 0.37188848853111267


Validating: 0it [00:00, ?it/s]

epoch = 98, custom_mae = 0.44201356172561646


Validating: 0it [00:00, ?it/s]

epoch = 99, custom_mae = 0.3499276340007782


Validating: 0it [00:00, ?it/s]

epoch = 100, custom_mae = 0.34677237272262573


Validating: 0it [00:00, ?it/s]

epoch = 101, custom_mae = 0.3411085307598114


Validating: 0it [00:00, ?it/s]

epoch = 102, custom_mae = 0.4696846306324005


Validating: 0it [00:00, ?it/s]

epoch = 103, custom_mae = 0.3331145644187927


Validating: 0it [00:00, ?it/s]

epoch = 104, custom_mae = 0.41172781586647034


Validating: 0it [00:00, ?it/s]

epoch = 105, custom_mae = 0.3118434548377991


Validating: 0it [00:00, ?it/s]

epoch = 106, custom_mae = 0.30227670073509216


Validating: 0it [00:00, ?it/s]

epoch = 107, custom_mae = 0.29616057872772217


Validating: 0it [00:00, ?it/s]

epoch = 108, custom_mae = 0.3106217086315155


Validating: 0it [00:00, ?it/s]

epoch = 109, custom_mae = 0.28029781579971313


Validating: 0it [00:00, ?it/s]

epoch = 110, custom_mae = 0.27709516882896423


Validating: 0it [00:00, ?it/s]

epoch = 111, custom_mae = 0.28543105721473694


Validating: 0it [00:00, ?it/s]

epoch = 112, custom_mae = 0.2765474021434784


Validating: 0it [00:00, ?it/s]

epoch = 113, custom_mae = 0.27011266350746155


Validating: 0it [00:00, ?it/s]

epoch = 114, custom_mae = 0.27995535731315613


Validating: 0it [00:00, ?it/s]

epoch = 115, custom_mae = 0.25836095213890076


Validating: 0it [00:00, ?it/s]

epoch = 116, custom_mae = 0.2621538043022156


Validating: 0it [00:00, ?it/s]

epoch = 117, custom_mae = 0.2455420047044754


Validating: 0it [00:00, ?it/s]

epoch = 118, custom_mae = 0.24577902257442474


Validating: 0it [00:00, ?it/s]

epoch = 119, custom_mae = 0.2470245361328125


Validating: 0it [00:00, ?it/s]

epoch = 120, custom_mae = 0.239319309592247


Validating: 0it [00:00, ?it/s]

epoch = 121, custom_mae = 0.23857273161411285


Validating: 0it [00:00, ?it/s]

epoch = 122, custom_mae = 0.23762118816375732


Validating: 0it [00:00, ?it/s]

epoch = 123, custom_mae = 0.2370583415031433


Validating: 0it [00:00, ?it/s]

epoch = 124, custom_mae = 0.2366999387741089


Validating: 0it [00:00, ?it/s]

epoch = 125, custom_mae = 0.2366713285446167


Validating: 0it [00:00, ?it/s]

epoch = 126, custom_mae = 0.2367272526025772


Validating: 0it [00:00, ?it/s]

epoch = 127, custom_mae = 0.23661468923091888


Validating: 0it [00:00, ?it/s]

epoch = 128, custom_mae = 0.23655933141708374


Validating: 0it [00:00, ?it/s]

epoch = 129, custom_mae = 0.2372959554195404


Validating: 0it [00:00, ?it/s]

epoch = 130, custom_mae = 0.23785004019737244


Validating: 0it [00:00, ?it/s]

epoch = 131, custom_mae = 0.23737575113773346


Validating: 0it [00:00, ?it/s]

epoch = 132, custom_mae = 0.23976580798625946


Validating: 0it [00:00, ?it/s]

epoch = 133, custom_mae = 0.24018901586532593


Validating: 0it [00:00, ?it/s]

epoch = 134, custom_mae = 0.24084293842315674


Validating: 0it [00:00, ?it/s]

epoch = 135, custom_mae = 0.24601800739765167


Validating: 0it [00:00, ?it/s]

epoch = 136, custom_mae = 0.2529968023300171


Validating: 0it [00:00, ?it/s]

epoch = 137, custom_mae = 0.2958715260028839


Validating: 0it [00:00, ?it/s]

epoch = 138, custom_mae = 0.2965468168258667


Validating: 0it [00:00, ?it/s]

epoch = 139, custom_mae = 0.25971487164497375


Validating: 0it [00:00, ?it/s]

epoch = 140, custom_mae = 0.27705562114715576


Validating: 0it [00:00, ?it/s]

epoch = 141, custom_mae = 0.29377540946006775


Validating: 0it [00:00, ?it/s]

epoch = 142, custom_mae = 0.32423245906829834


Validating: 0it [00:00, ?it/s]

epoch = 143, custom_mae = 0.2953219711780548


Validating: 0it [00:00, ?it/s]

epoch = 144, custom_mae = 0.2822711765766144


Validating: 0it [00:00, ?it/s]

epoch = 145, custom_mae = 0.3635061979293823


Validating: 0it [00:00, ?it/s]

epoch = 146, custom_mae = 0.2918395400047302


Validating: 0it [00:00, ?it/s]

epoch = 147, custom_mae = 0.5078725218772888


Validating: 0it [00:00, ?it/s]

epoch = 148, custom_mae = 0.36015817523002625


Validating: 0it [00:00, ?it/s]

epoch = 149, custom_mae = 0.3262135982513428


Validating: 0it [00:00, ?it/s]

epoch = 150, custom_mae = 0.311964750289917


Validating: 0it [00:00, ?it/s]

epoch = 151, custom_mae = 0.31984376907348633


Validating: 0it [00:00, ?it/s]

epoch = 152, custom_mae = 0.3044431507587433


Validating: 0it [00:00, ?it/s]

epoch = 153, custom_mae = 0.34031251072883606


Validating: 0it [00:00, ?it/s]

epoch = 154, custom_mae = 0.2894577085971832


Validating: 0it [00:00, ?it/s]

epoch = 155, custom_mae = 0.30551162362098694


Validating: 0it [00:00, ?it/s]

epoch = 156, custom_mae = 0.2839062213897705


Validating: 0it [00:00, ?it/s]

epoch = 157, custom_mae = 0.31850260496139526


Validating: 0it [00:00, ?it/s]

epoch = 158, custom_mae = 0.26893308758735657


Validating: 0it [00:00, ?it/s]

epoch = 159, custom_mae = 0.281325101852417


Validating: 0it [00:00, ?it/s]

epoch = 160, custom_mae = 0.2551605999469757


Validating: 0it [00:00, ?it/s]

epoch = 161, custom_mae = 0.2603538930416107


Validating: 0it [00:00, ?it/s]

epoch = 162, custom_mae = 0.26324039697647095


Validating: 0it [00:00, ?it/s]

epoch = 163, custom_mae = 0.24416831135749817


Validating: 0it [00:00, ?it/s]

epoch = 164, custom_mae = 0.24754224717617035


Validating: 0it [00:00, ?it/s]

epoch = 165, custom_mae = 0.23822352290153503


Validating: 0it [00:00, ?it/s]

epoch = 166, custom_mae = 0.23587961494922638


Validating: 0it [00:00, ?it/s]

epoch = 167, custom_mae = 0.23554636538028717


Validating: 0it [00:00, ?it/s]

epoch = 168, custom_mae = 0.23067131638526917


Validating: 0it [00:00, ?it/s]

epoch = 169, custom_mae = 0.2286965548992157


Validating: 0it [00:00, ?it/s]

epoch = 170, custom_mae = 0.2270730435848236


Validating: 0it [00:00, ?it/s]

epoch = 171, custom_mae = 0.2260543256998062


Validating: 0it [00:00, ?it/s]

epoch = 172, custom_mae = 0.22540496289730072


Validating: 0it [00:00, ?it/s]

epoch = 173, custom_mae = 0.22521322965621948


Validating: 0it [00:00, ?it/s]

epoch = 174, custom_mae = 0.22476616501808167


Validating: 0it [00:00, ?it/s]

epoch = 175, custom_mae = 0.22474046051502228


Validating: 0it [00:00, ?it/s]

epoch = 176, custom_mae = 0.2248213291168213


Validating: 0it [00:00, ?it/s]

epoch = 177, custom_mae = 0.22478902339935303


Validating: 0it [00:00, ?it/s]

epoch = 178, custom_mae = 0.22539077699184418


Validating: 0it [00:00, ?it/s]

epoch = 179, custom_mae = 0.22541384398937225


Validating: 0it [00:00, ?it/s]

epoch = 180, custom_mae = 0.2269209623336792


Validating: 0it [00:00, ?it/s]

epoch = 181, custom_mae = 0.22797785699367523


Validating: 0it [00:00, ?it/s]

epoch = 182, custom_mae = 0.23070164024829865


Validating: 0it [00:00, ?it/s]

epoch = 183, custom_mae = 0.23557360470294952


Validating: 0it [00:00, ?it/s]

epoch = 184, custom_mae = 0.23166903853416443


Validating: 0it [00:00, ?it/s]

epoch = 185, custom_mae = 0.2357710301876068


Validating: 0it [00:00, ?it/s]

epoch = 186, custom_mae = 0.2334492802619934


Validating: 0it [00:00, ?it/s]

epoch = 187, custom_mae = 0.2400142401456833


Validating: 0it [00:00, ?it/s]

epoch = 188, custom_mae = 0.24836516380310059


Validating: 0it [00:00, ?it/s]

epoch = 189, custom_mae = 0.24651657044887543


Validating: 0it [00:00, ?it/s]

epoch = 190, custom_mae = 0.30636852979660034


Validating: 0it [00:00, ?it/s]

epoch = 191, custom_mae = 0.32787567377090454


Validating: 0it [00:00, ?it/s]

epoch = 192, custom_mae = 0.3762907385826111


Validating: 0it [00:00, ?it/s]

epoch = 193, custom_mae = 0.3003760278224945


Validating: 0it [00:00, ?it/s]

epoch = 194, custom_mae = 0.2681061625480652


Validating: 0it [00:00, ?it/s]

epoch = 195, custom_mae = 0.2785860300064087


Validating: 0it [00:00, ?it/s]

epoch = 196, custom_mae = 0.3151203691959381


Validating: 0it [00:00, ?it/s]

epoch = 197, custom_mae = 0.3108154833316803


Validating: 0it [00:00, ?it/s]

epoch = 198, custom_mae = 0.280575692653656


Validating: 0it [00:00, ?it/s]

epoch = 199, custom_mae = 0.2818984091281891
train done.
validate done.
fold = 0, auc = 0.22474045990447625
inference done.


In [30]:
if len(CFG.folds) != CFG.n_folds:

    oof_score = get_score(oof_pred, oof_target, val_df['u_out'].values)
    print(f'MAE {oof_score}')

    oof_df = train.iloc[val_idxes[0], :1]
    oof_df['pressure'] = oof_pred
    oof_df.to_csv(OUTPUT_DIR / f'oof{CFG.exp_num}.csv',index = False)    
else:
    score = get_score(y, oof_total, train['u_out'].values)
    print(f'MAE {score}: folds: {scores}')

    oof_df = pd.DataFrame({'id': train['id'].values, 'pressure':oof_total.reshape(-1)})
    oof_df.to_csv(OUTPUT_DIR / f'oof{CFG.exp_num}.csv',index = False)
oof_df

MAE 0.22474045990447625


Unnamed: 0,id,pressure
240,241,6.417948
241,242,5.667470
242,243,6.806201
243,244,9.437632
244,245,11.526283
...,...,...
6035995,6035996,34.482624
6035996,6035997,34.248466
6035997,6035998,33.460377
6035998,6035999,31.141582


In [31]:
sub = pd.read_csv(DATA_DIR / 'sample_submission.csv')
sub['pressure'] = np.mean(sub_preds, axis=1)
sub.to_csv(OUTPUT_DIR / f'sub{CFG.exp_num}.csv',index = False)
sub

Unnamed: 0,id,pressure
0,1,5.921979
1,2,5.839980
2,3,7.023990
3,4,7.638119
4,5,9.091606
...,...,...
4023995,4023996,12.817714
4023996,4023997,12.970569
4023997,4023998,12.900802
4023998,4023999,12.156372


In [32]:
wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type='summary')
wandb.run.name = 'summary'
wandb.log({'CV_score': oof_score})
# wandb.save(utils.get_notebook_path())
wandb.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
CV_score,▁
Loss/val,█▆▅▃▃▃▃▃▃▄▄▃▂▂▁▁▁▂▂▃▂▂▂▁▁▁▁▂▂▄▂▂▁▁▁▁▁▁▂▂
custom_mae/val,█▆▅▃▃▃▃▃▃▄▄▃▂▂▁▁▁▂▂▃▂▂▂▁▁▁▁▂▂▄▂▂▁▁▁▁▁▁▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
CV_score,0.22474
Loss/val,0.28184
custom_mae/val,0.2819
epoch,199.0
trainer/global_step,94199.0


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
CV_score,▁

0,1
CV_score,0.22474
