In [1]:
import sys
from time import time
import numpy as np
import pandas as pd
from pathlib import Path
import lightgbm as lgb
import matplotlib.pyplot as plt 
import seaborn as sns
from tqdm import tqdm
import copy
import wandb
from collections import OrderedDict

from sklearn.metrics import mean_absolute_error
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as torchdata

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import WandbLogger


In [2]:
sys.path.append('../../src/')
import utils as utils
from utils import Timer

In [3]:
class CFG:
    seed = 42
    exp_num = 32
    local = True
    n_folds = 5
    folds = [0]
    debug = False
    bias = 1000
    epochs = 200

    
    ######################
    # Dataset #
    ######################
    transforms = {
        "train": [{"name": ""}],
        "valid": [{"name": ""}],
        "test": [{"name": ""}]
    }

    ######################
    # Loaders #
    ######################
    loader_params = {
        "train": {
            'batch_size': 128,
            'shuffle': True,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': True,
        },
        "valid": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        },
        "test": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        }
    }

    ######################
    # Split #
    ######################
    split = "GroupKFold"
    split_params = {
        "n_splits": 5,
    }

    ######################
    # Model #
    ######################
    input_dim = 5

    dense_dim = 512
    lstm_dim = 512
    logit_dim = 512
    num_classes = 1

    ######################
    # Criterion #
    ######################
#     loss_name = "rmspe_loss"
#     loss_params: dict = {}

    ######################
    # Optimizer #
    ######################
    optimizer_name = "AdamW"
    optimizer_params = {
        "lr": 0.001,
        'weight_decay': 1e-6
    }

    ######################
    # Scheduler #
    ######################
    scheduler_name = "CosineAnnealingWarmRestarts"
    scheduler_params = {
        'T_0': 40, 
        'T_mult': 1
    }

In [4]:
utils.set_seed(CFG.seed)

In [5]:
if CFG.local:
    DATA_DIR = Path("/home/knikaido/work/Ventilator-Pressure-Prediction/data/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('./output/')
else:
    DATA_DIR = Path("../input/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('')   

In [6]:
def get_transforms(phase: str):
    transforms = CFG.transforms
    if transforms is None:
        return None
    else:
        if transforms[phase] is None:
            return None
        trns_list = []
        for trns_conf in transforms[phase]:
            trns_name = trns_conf["name"]
            trns_params = {} if trns_conf.get("params") is None else \
                trns_conf["params"]
            if globals().get(trns_name) is not None:
                trns_cls = globals()[trns_name]
                trns_list.append(trns_cls(**trns_params))

        if len(trns_list) > 0:
            return Compose(trns_list)
        else:
            return None
        
        
class Normalize:
    def __call__(self, y: np.ndarray):
        max_vol = np.abs(y).max()
        y_vol = y * 1 / max_vol
        return np.asfortranarray(y_vol)


class Compose:
    def __init__(self, transforms: list):
        self.transforms = transforms

    def __call__(self, y: np.ndarray):
        for trns in self.transforms:
            y = trns(y)
        return y

In [7]:
def compute_metric(preds, trues, u_outs):
    """
    Metric for the problem, as I understood it.
    """
    
    y = trues
    w = 1 - u_outs
    
    assert y.shape == preds.shape and w.shape == y.shape, (y.shape, preds.shape, w.shape)
    
    mae = w * np.abs(y - preds)
    mae = mae.sum() / w.sum()
    
    return mae


class VentilatorLoss(nn.Module):
    """
    Directly optimizes the competition metric
    """
    def __call__(self, preds, y, u_out):
        w = 1 - u_out
        mae = w * (y - preds).abs()
        mae = mae.sum(-1) / w.sum(-1)

        return mae

In [8]:
def get_criterion():
    return VentilatorLoss()

In [9]:
# Custom optimizer
__OPTIMIZERS__ = {}


def get_optimizer(model: nn.Module):
    optimizer_name = CFG.optimizer_name
    if optimizer_name == "SAM":
        base_optimizer_name = CFG.base_optimizer
        if __OPTIMIZERS__.get(base_optimizer_name) is not None:
            base_optimizer = __OPTIMIZERS__[base_optimizer_name]
        else:
            base_optimizer = optim.__getattribute__(base_optimizer_name)
        return SAM(model.parameters(), base_optimizer, **CFG.optimizer_params)

    if __OPTIMIZERS__.get(optimizer_name) is not None:
        return __OPTIMIZERS__[optimizer_name](model.parameters(),
                                              **CFG.optimizer_params)
    else:
        return optim.__getattribute__(optimizer_name)(model.parameters(),
                                                      **CFG.optimizer_params)


def get_scheduler(optimizer):
    scheduler_name = CFG.scheduler_name

    if scheduler_name is None:
        return
    else:
        return optim.lr_scheduler.__getattribute__(scheduler_name)(
            optimizer, **CFG.scheduler_params)

In [10]:
# validation
splitter = getattr(model_selection, CFG.split)(**CFG.split_params)

In [11]:
class VentilatorDataset(torchdata.Dataset):
    def __init__(self, df, train_value_col, train_category_col):
        if "pressure" not in df.columns:
            df['pressure'] = 0
        self.df = df
        self.groups = df.groupby('breath_id').groups
        self.keys = list(self.groups.keys())
        self.train_value_col = train_value_col
        self.train_category_col = train_category_col

        
    def __len__(self):
        return len(self.groups)

    def __getitem__(self, idx):
        indexes = self.groups[self.keys[idx]]
        df_ = self.df.iloc[indexes]
        
        input_value = df_[self.train_value_col].values
        input_category = df_[self.train_category_col].values

        u_out_ = df_['u_out'].values
        p_ = df_['pressure'].values

        data = {
            "input_value": input_value.astype(np.float32),
            "input_category": input_category.astype(int),
            "u_out": u_out_.astype(np.float32),
            "p": p_.astype(np.float32),
        }
        
        return data

In [12]:
class RNNModel(nn.Module):
    def __init__(
        self,
        input_dim=4,
        lstm_dim=256,
        dense_dim=256,
        logit_dim=256,
        num_classes=1,
    ):
        super().__init__()
        
        self.rc_emb = nn.Embedding(9, 4, padding_idx=0)
        
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, dense_dim // 2),
            nn.LayerNorm(dense_dim // 2),
            nn.ReLU(),
            nn.Linear(dense_dim // 2, dense_dim),
            nn.LayerNorm(dense_dim),
            nn.ReLU(),
        )

        self.conv_basic = nn.Sequential(
            nn.Conv1d(in_channels=dense_dim+4, out_channels=dense_dim+4, kernel_size=3, padding=1),
            nn.LayerNorm(80),
            nn.ReLU(),
            nn.Conv1d(in_channels=dense_dim+4, out_channels=dense_dim+4, kernel_size=3, padding=1),
            nn.LayerNorm(80),
            nn.ReLU(),
        )

        self.lstm = nn.LSTM(dense_dim+4, lstm_dim, batch_first=True, bidirectional=True, num_layers=2)

        self.logits = nn.Sequential(
            nn.Linear(lstm_dim * 2, logit_dim),
            nn.LayerNorm(logit_dim),
            nn.ReLU(),
            nn.Linear(logit_dim, num_classes),
        )
        
        
        # nakamaさんの初期化
        for n, m in self.named_modules():
            if isinstance(m, nn.LSTM):
                print(f'init {m}')
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        nn.init.orthogonal_(param.data)
                    else:
                        nn.init.normal_(param.data)
            elif isinstance(m, nn.GRU):
                print(f"init {m}")
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        init.orthogonal_(param.data)
                    else:
                        init.normal_(param.data)

    def forward(self, cont_seq_x, cate_seq_x):
        bs = cont_seq_x.size(0)
        rc_emb = self.rc_emb(cate_seq_x).view(bs, 80, -1)
        
        features = self.mlp(cont_seq_x)
        features = torch.cat((rc_emb, features), 2)

        features = self.conv_basic(features.permute([0, 2, 1]))
        features, _ = self.lstm(features.permute([0, 2, 1]))
        
        pred = self.logits(features)
        return pred

In [13]:
# Learner class(pytorch-lighting)
class Learner(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.criterion = get_criterion()
    
    def training_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input_value'], d_['input_category'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        return loss
    
    def validation_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input_value'], d_['input_category'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        
        self.log(f'Loss/val', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
        
        output = OrderedDict({
            "targets": d_['p'].detach(), "preds": output.detach(), "u_outs": d_['u_out'].detach(), "loss": loss.detach()
        })
        return output
    
    def validation_epoch_end(self, outputs):

        targets = torch.cat([o["targets"].view(-1) for o in outputs]).cpu().numpy()
        preds = torch.cat([o["preds"].view(-1) for o in outputs]).cpu().numpy()
        u_outs = torch.cat([o["u_outs"].view(-1) for o in outputs]).cpu().numpy()

        score = get_score(preds, targets, u_outs)
        self.log(f'custom_mae/val', score, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        print(f'epoch = {self.current_epoch}, custom_mae = {score}')

    def configure_optimizers(self):
        optimizer = get_optimizer(self.model)
        scheduler = get_scheduler(optimizer)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "Loss/val"}

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
def get_score(y_pred, y_true, u_outs):
    return compute_metric(y_pred, y_true, u_outs)


def to_np(input):
    return input.detach().cpu().numpy()

# oof
def evaluate(model, loaders, phase):
    model.eval()
    pred_list = []
    target_list = []
    with torch.no_grad():
        for batch in loaders[phase]:
            d_ = batch
            d_['input_value'] = d_['input_value'].to(device)
            d_['input_category'] = d_['input_category'].to(device)
            output = model(d_['input_value'], d_['input_category'])
#             output = nn.Softmax(dim=1)(output)
            pred_list.append(to_np(output))
            target_list.append(to_np(d_['p']))

    pred_list = np.concatenate(pred_list).reshape(-1)
    target_list = np.concatenate(target_list).reshape(-1)
    model.train()
    return pred_list, target_list

In [16]:
train = pd.read_csv(DATA_DIR / 'train.csv')
test = pd.read_csv(DATA_DIR / 'test.csv')
display(train), display(test)

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.000000,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.355850,0,12.234987
...,...,...,...,...,...,...,...,...
6035995,6035996,125749,50,10,2.504603,1.489714,1,3.869032
6035996,6035997,125749,50,10,2.537961,1.488497,1,3.869032
6035997,6035998,125749,50,10,2.571408,1.558978,1,3.798729
6035998,6035999,125749,50,10,2.604744,1.272663,1,4.079938


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.000000,0.000000,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.230610,0
4,5,0,5,20,0.127644,26.320956,0
...,...,...,...,...,...,...,...
4023995,4023996,125748,20,10,2.530117,4.971245,1
4023996,4023997,125748,20,10,2.563853,4.975709,1
4023997,4023998,125748,20,10,2.597475,4.979468,1
4023998,4023999,125748,20,10,2.631134,4.982648,1


(None, None)

In [17]:
def get_raw_features(input_df, dataType = 'train'):
    colum = ['time_step', 'u_in', 'R', 'C']

    return input_df[colum]

In [18]:
def get_category_features(input_df, dataType = 'train'):
    output_df = copy.deepcopy(input_df)
    colum = ['R_C']
    rc_map = {'5_10': 0, '5_20': 1, '5_50': 2, '20_10': 3, '20_20': 4, '20_50': 5, '50_10': 6, '50_20': 7, '50_50': 8}
    
    output_df['R_C'] = [f'{r}_{c}' for r, c in zip(output_df['R'], output_df['C'])]
    output_df['R_C'] = output_df['R_C'].map(rc_map)

    return output_df[colum]

In [19]:
def get_diff_shift_features(input_df, dataType = 'train'):
    
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    b_id_gby = input_df.groupby(['breath_id'])
    shift_idx = [-2, -1, 1, 2, 3, 4]
    
    # diffより直接引いたほうが早い
    for i in shift_idx:
        output_df[f'u_in_shift_{i}'] = b_id_gby['u_in'].shift(i)
        output_df[f'u_in_diff_{i}'] = output_df['u_in'] - output_df[f'u_in_shift_{i}']

        output_df[f'time_step_shift_{i}'] = b_id_gby['time_step'].shift(i)
        output_df[f'time_step_diff_{i}'] = output_df['time_step'] - output_df[f'time_step_shift_{i}']
    
    return output_df.iloc[:, c_num:]

In [20]:
def get_cum_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    b_id_gby = input_df.groupby(['breath_id'])
    
    output_df['u_in_cumsum'] = b_id_gby['u_in'].cumsum()
    output_df['time_step_cumsum'] = b_id_gby['time_step'].cumsum()
    
    return output_df.iloc[:, c_num:]

In [21]:
def get_simple_calc_features(input_df, dataType = 'train'):
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    output_df['area'] = output_df['time_step'] * output_df['u_in']
    output_df['area'] = output_df.groupby('breath_id')['area'].cumsum()
    
#     output_df['sumRC'] = output_df['R'] + output_df['C']
#     output_df['dotRC'] = output_df['R'] * output_df['C']

    return output_df.iloc[:, c_num:]

In [22]:
def get_agg_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    # Dict for aggregations
    create_feature_dict = {
        'u_in': [np.max, np.std, np.mean, 'first', 'last'],
    }
    
    def get_agg_window(start_time=0, end_time=3.0, add_suffix = False):
        
        df_tgt = output_df[(output_df['time_step'] >= start_time) & (output_df['time_step'] <= end_time)]
        df_feature = df_tgt.groupby(['breath_id']).agg(create_feature_dict)
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(start_time) + '_' + str(end_time))
            
        return df_feature
    
    df_agg_feature = get_agg_window().reset_index()
    
#     df_tmp = get_agg_window(start_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(start_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')

    output_df = pd.merge(output_df, df_agg_feature, how='left', on='breath_id')
    
    output_df['u_in_diffmax'] = output_df['u_in_amax'] - output_df['u_in']
    output_df['u_in_diffmean'] = output_df['u_in_mean'] - output_df['u_in']
    
    return output_df.iloc[:, c_num:]

In [23]:
def to_feature(input_df, dataType = 'train'):
    """input_df を特徴量行列に変換した新しいデータフレームを返す.
    """

    processors = [
        get_raw_features,
        get_category_features,
        get_simple_calc_features,
        get_diff_shift_features,
        get_cum_features,
        get_agg_features
    ]

    out_df = pd.DataFrame()

    for func in tqdm(processors, total=len(processors)):
        with Timer(prefix='' + func.__name__ + ' '):
            _df = func(input_df, dataType)

        # 長さが等しいことをチェック (ずれている場合, func の実装がおかしい)
        assert len(_df) == len(input_df), func.__name__
        out_df = pd.concat([out_df, _df], axis=1)
#     out_df = utils.reduce_mem_usage(out_df)
    
    return out_df

In [24]:
train_df = to_feature(train, dataType = 'train')
test_df = to_feature(test, dataType = 'test')

  0%|          | 0/6 [00:00<?, ?it/s]

get_raw_features  0.027[s]


 33%|███▎      | 2/6 [00:02<00:04,  1.20s/it]

get_category_features  2.309[s]


 50%|█████     | 3/6 [00:02<00:02,  1.24it/s]

get_simple_calc_features  0.172[s]
get_diff_shift_features  2.037[s]


 67%|██████▋   | 4/6 [00:05<00:02,  1.49s/it]

get_cum_features  0.199[s]


 83%|████████▎ | 5/6 [00:06<00:01,  1.26s/it]

get_agg_features  1.307[s]


100%|██████████| 6/6 [00:08<00:00,  1.37s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

get_raw_features  0.019[s]


 50%|█████     | 3/6 [00:01<00:01,  1.84it/s]

get_category_features  1.577[s]
get_simple_calc_features  0.104[s]
get_diff_shift_features  1.228[s]


 67%|██████▋   | 4/6 [00:03<00:01,  1.07it/s]

get_cum_features  0.130[s]


 83%|████████▎ | 5/6 [00:03<00:00,  1.26it/s]

get_agg_features  0.747[s]


100%|██████████| 6/6 [00:05<00:00,  1.16it/s]


In [25]:
train_value_col = [i for i in train_df.columns.to_list() if i not in ['R_C']]
train_category_col = ['R_C']

In [26]:
ss = RobustScaler()

train_category = train_df[train_category_col]
train_df = pd.DataFrame(ss.fit_transform(train_df[train_value_col]), columns=train_value_col)
train_mean = train_df.mean()
train_df = train_df.fillna(train_df.mean())

test_category = test_df[train_category_col]
test_df = pd.DataFrame(ss.transform(test_df[train_value_col]), columns=train_value_col)
test_df = test_df.fillna(train_mean)

In [27]:
display(train_df), display(test_df)

Unnamed: 0,time_step,u_in,R,C,area,u_in_shift_-2,u_in_diff_-2,time_step_shift_-2,time_step_diff_-2,u_in_shift_-1,...,time_step_diff_4,u_in_cumsum,time_step_cumsum,u_in_amax,u_in_std,u_in_mean,u_in_first,u_in_last,u_in_diffmax,u_in_diffmean
0,-0.989052,-0.937384,0.000000,0.75,-0.516581,3.941607,-82.907393,-0.990103,-0.135069,3.079597,...,-0.209984,-0.725228,-0.496658,0.112208,0.443556,0.908069,-0.137993,0.631103,0.326941,1.273864
1,-0.963608,3.049278,0.000000,0.75,-0.514031,4.006709,-16.321179,-0.963649,-0.222686,3.988117,...,-0.209984,-0.676829,-0.496024,0.112208,0.443556,0.908069,-0.137993,0.631103,-0.178436,-1.450205
2,-0.938006,3.948195,0.000000,0.75,-0.507768,4.560278,-10.479371,-0.937051,-0.304628,4.054071,...,-0.209984,-0.617568,-0.494752,0.112208,0.443556,0.908069,-0.137993,0.631103,-0.292389,-2.064430
3,-0.912278,4.013452,0.000000,0.75,-0.498222,4.974095,-16.414632,-0.910663,-0.284657,4.614879,...,-0.209984,-0.557517,-0.492839,0.112208,0.443556,0.908069,-0.137993,0.631103,-0.300662,-2.109020
4,-0.886409,4.568332,0.000000,0.75,-0.484036,4.945324,-6.502963,-0.884223,-0.237261,5.034107,...,0.187352,-0.490761,-0.490282,0.112208,0.443556,0.908069,-0.137993,0.631103,-0.371002,-2.488167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035995,0.904641,-0.630999,0.666667,-0.25,0.596562,-0.611712,-0.205551,0.956475,0.030596,-0.640263,...,-0.081341,0.293724,1.295715,0.044505,-0.010334,-0.106479,0.284129,-193.672880,0.210531,0.285296
6035996,0.929862,-0.631264,0.666667,-0.25,0.612131,-0.673940,0.849083,0.982391,0.035436,-0.624745,...,-0.079057,0.297643,1.343525,0.044505,-0.010334,-0.106479,0.284129,-193.672880,0.210564,0.285477
6035997,0.955151,-0.615910,0.666667,-0.25,0.628653,-0.628282,0.332694,1.008258,0.075990,-0.687786,...,-0.064988,0.301748,1.391965,0.044505,-0.010334,-0.106479,0.284129,-193.672880,0.208618,0.274986
6035998,0.980357,-0.678284,0.666667,-0.25,0.642315,0.564720,1.592482,-0.000598,0.172138,-0.641531,...,-0.073136,0.305098,1.441033,0.044505,-0.010334,-0.106479,0.284129,-193.672880,0.216525,0.317606


Unnamed: 0,time_step,u_in,R,C,area,u_in_shift_-2,u_in_diff_-2,time_step_shift_-2,time_step_diff_-2,u_in_shift_-1,...,time_step_diff_4,u_in_cumsum,time_step_cumsum,u_in_amax,u_in_std,u_in_mean,u_in_first,u_in_last,u_in_diffmax,u_in_diffmean
0,-0.989052,-0.955539,-0.333333,0.00,-0.516581,2.233843,-54.148815,-0.992969,0.725245,0.686669,...,-0.209984,-0.725447,-0.496658,0.334664,0.648469,0.749398,-0.143908,-0.128746,0.584122,1.164403
1,-0.964930,0.681643,-0.333333,0.00,-0.515592,3.663702,-50.685953,-0.968151,0.720572,2.258021,...,-0.209984,-0.705662,-0.496057,0.334664,0.648469,0.749398,-0.143908,-0.128746,0.376581,0.045724
2,-0.940793,2.236385,-0.333333,0.00,-0.511738,4.770033,-43.116339,-0.943357,0.727804,3.706578,...,-0.209984,-0.667087,-0.494854,0.334664,0.648469,0.749398,-0.143908,-0.128746,0.179491,-1.016625
3,-0.916656,3.669632,-0.333333,0.00,-0.503360,5.675464,-34.190341,-0.918547,0.730196,4.827377,...,-0.209984,-0.611192,-0.493051,0.334664,0.648469,0.749398,-0.143908,-0.128746,-0.002197,-1.995956
4,-0.892543,4.778584,-0.333333,0.00,-0.489513,6.340307,-26.676029,-0.893736,0.725134,5.744647,...,-0.771935,-0.541895,-0.490646,0.334664,0.648469,0.749398,-0.143908,-0.128746,-0.142776,-2.753697
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4023995,0.923932,0.127466,0.000000,-0.25,0.596586,0.131694,0.020256,0.976740,-0.098632,0.127555,...,0.152955,-0.212807,1.312865,0.277036,-0.110804,-0.538383,0.564867,0.536504,0.380806,-0.564680
4023996,0.949439,0.128438,0.000000,-0.25,0.649162,0.132385,0.025002,1.002907,-0.080552,0.128383,...,0.175679,-0.199707,1.361163,0.277036,-0.110804,-0.538383,0.564867,0.536504,0.380682,-0.565344
4023997,0.974860,0.129257,0.000000,-0.25,0.702468,0.132977,0.028826,1.029469,-0.207777,0.129083,...,0.110381,-0.186597,1.410094,0.277036,-0.110804,-0.538383,0.564867,0.536504,0.380579,-0.565904
4023998,1.000309,0.129950,0.000000,-0.25,0.756500,0.564720,1.592482,-0.000598,0.172138,0.129683,...,0.070853,-0.173479,1.459659,0.277036,-0.110804,-0.538383,0.564867,0.536504,0.380491,-0.566377


(None, None)

In [28]:
train_df = pd.concat([train_df, train_category, train[['id', 'breath_id', 'pressure', 'u_out']]], axis=1)
test_df = pd.concat([test_df, test_category, test[['id', 'breath_id', 'u_out']]], axis=1)

In [29]:
train_df = utils.reduce_mem_usage(train_df)
test_df = utils.reduce_mem_usage(test_df)

Mem. usage decreased from 1980.19 Mb to 506.56 Mb (74.4% reduction)
Mem. usage decreased from 1289.43 Mb to 330.03 Mb (74.4% reduction)


In [30]:
oof_total = np.zeros((len(train), CFG.num_classes))
sub_preds = np.zeros((test.shape[0], len(CFG.folds)))
val_idxes = []
models = []
y = train['pressure']
groups = train['breath_id']
gkfold = model_selection.GroupKFold(n_splits=CFG.n_folds)
scores = []
input_dim = len(train_value_col)

for i, (trn_idx, val_idx) in enumerate(splitter.split(train_df, y, groups)):
    if i not in CFG.folds:
        continue

    trn_df = train_df.loc[trn_idx, :].reset_index(drop=True)
    val_df = train_df.loc[val_idx, :].reset_index(drop=True)
    trn_y = y.values[trn_idx]
    val_y = y.values[val_idx]
    
    
    loaders = {
        phase: torchdata.DataLoader(
            VentilatorDataset(
                df_, train_value_col, train_category_col
            ),
            **CFG.loader_params[phase])  # type: ignore
        for phase, df_ in zip(["train", "valid", "test"], [trn_df, val_df, test_df])
    }
    
    
    model = RNNModel(
        input_dim=input_dim,
        lstm_dim=CFG.lstm_dim,
        dense_dim=CFG.dense_dim,
        logit_dim=CFG.logit_dim,
        num_classes=CFG.num_classes,
    )
    model_name = model.__class__.__name__
#     break
    
    learner = Learner(model)
    
    # loggers
    RUN_NAME = f'exp{str(CFG.exp_num)}'
    wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type=RUN_NAME + f'-fold-{i}')
    wandb.run.name = RUN_NAME + f'-fold-{i}'
    wandb_config = wandb.config
    wandb_config.model_name = model_name
    wandb.watch(model)
    
    # callbacks
    callbacks = []
    checkpoint_callback = ModelCheckpoint(
        monitor=f'Loss/val',
        mode='min',
        dirpath=OUTPUT_DIR,
        verbose=False,
        save_weights_only=True,
        filename=f'{model_name}-{learner.current_epoch}-{i}')
    callbacks.append(checkpoint_callback)

#     early_stop_callback = EarlyStopping(
#         monitor='Loss/val',
#         min_delta=0.00,
#         patience=10,
#         verbose=True,
#         mode='min')
#     callbacks.append(early_stop_callback)
    
    loggers = []
    loggers.append(WandbLogger())
    
    trainer = pl.Trainer(
        logger=loggers,
        callbacks=callbacks,
        max_epochs=CFG.epochs,
        default_root_dir=OUTPUT_DIR,
        gpus=1,
#         fast_dev_run=DEBUG,
        deterministic=True,
        benchmark=False,
        )
    
    trainer.fit(learner, train_dataloader=loaders['train'], val_dataloaders=loaders['valid'])
#     trainer.save_checkpoint(OUTPUT_DIR / "last.ckpt")
    print('train done.')
    
    #############
    # validation (to make oof)
    #############
    checkpoint = torch.load(checkpoint_callback.best_model_path)
    learner.load_state_dict(checkpoint['state_dict'])
    
    model = model.to(device)
    oof_pred, oof_target = evaluate(model, loaders, phase="valid")
    models.append(model)
    
    oof_score = get_score(oof_pred, oof_target, val_df['u_out'].values)
    scores.append(oof_score)
    oof_total[val_idx] = oof_pred.reshape(1, -1).T / CFG.bias
    val_idxes.append(val_idx)
    
    print('validate done.')
    print(f'fold = {i}, auc = {oof_score}')
    wandb.log({'CV_score': oof_score})
    
    #############
    # inference
    #############
    test_pred, _ = evaluate(model, loaders, phase="test")
    sub_preds[:, i] = test_pred
    
    print('inference done.')

# test_preds_total = np.array(test_preds_total)


init LSTM(516, 512, num_layers=2, batch_first=True, bidirectional=True)


[34m[1mwandb[0m: Currently logged in as: [33msqrt4kaido[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type           | Params
---------------------------------------------
0 | model     | RNNModel       | 12.8 M
1 | criterion | VentilatorLoss | 0     
---------------------------------------------
12.8 M    Trainable params
0         Non-trainable params
12.8 M    Total params
51.148    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

epoch = 0, custom_mae = 17.216259002685547


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

epoch = 0, custom_mae = 1.028338074684143


Validating: 0it [00:00, ?it/s]

epoch = 1, custom_mae = 0.9143775701522827


Validating: 0it [00:00, ?it/s]

epoch = 2, custom_mae = 0.9160099625587463


Validating: 0it [00:00, ?it/s]

epoch = 3, custom_mae = 0.7873607873916626


Validating: 0it [00:00, ?it/s]

epoch = 4, custom_mae = 0.7475968599319458


Validating: 0it [00:00, ?it/s]

epoch = 5, custom_mae = 0.6009249091148376


Validating: 0it [00:00, ?it/s]

epoch = 6, custom_mae = 0.6207842230796814


Validating: 0it [00:00, ?it/s]

epoch = 7, custom_mae = 0.6472277045249939


Validating: 0it [00:00, ?it/s]

epoch = 8, custom_mae = 0.5900236368179321


Validating: 0it [00:00, ?it/s]

epoch = 9, custom_mae = 0.4989662170410156


Validating: 0it [00:00, ?it/s]

epoch = 10, custom_mae = 0.5630743503570557


Validating: 0it [00:00, ?it/s]

epoch = 11, custom_mae = 0.5378192067146301


Validating: 0it [00:00, ?it/s]

epoch = 12, custom_mae = 0.47030529379844666


Validating: 0it [00:00, ?it/s]

epoch = 13, custom_mae = 0.47327175736427307


Validating: 0it [00:00, ?it/s]

epoch = 14, custom_mae = 0.4566386044025421


Validating: 0it [00:00, ?it/s]

epoch = 15, custom_mae = 0.49939343333244324


Validating: 0it [00:00, ?it/s]

epoch = 16, custom_mae = 0.5184411406517029


Validating: 0it [00:00, ?it/s]

epoch = 17, custom_mae = 0.39044296741485596


Validating: 0it [00:00, ?it/s]

epoch = 18, custom_mae = 0.39903366565704346


Validating: 0it [00:00, ?it/s]

epoch = 19, custom_mae = 0.4927005171775818


Validating: 0it [00:00, ?it/s]

epoch = 20, custom_mae = 0.36678168177604675


Validating: 0it [00:00, ?it/s]

epoch = 21, custom_mae = 0.4415319561958313


Validating: 0it [00:00, ?it/s]

epoch = 22, custom_mae = 0.3334161937236786


Validating: 0it [00:00, ?it/s]

epoch = 23, custom_mae = 0.5288329720497131


Validating: 0it [00:00, ?it/s]

epoch = 24, custom_mae = 0.4024595022201538


Validating: 0it [00:00, ?it/s]

epoch = 25, custom_mae = 0.3219490349292755


Validating: 0it [00:00, ?it/s]

epoch = 26, custom_mae = 0.3058910071849823


Validating: 0it [00:00, ?it/s]

epoch = 27, custom_mae = 0.29839906096458435


Validating: 0it [00:00, ?it/s]

epoch = 28, custom_mae = 0.28578636050224304


Validating: 0it [00:00, ?it/s]

epoch = 29, custom_mae = 0.2772689461708069


Validating: 0it [00:00, ?it/s]

epoch = 30, custom_mae = 0.2910822927951813


Validating: 0it [00:00, ?it/s]

epoch = 31, custom_mae = 0.2738257348537445


Validating: 0it [00:00, ?it/s]

epoch = 32, custom_mae = 0.265507310628891


Validating: 0it [00:00, ?it/s]

epoch = 33, custom_mae = 0.26313549280166626


Validating: 0it [00:00, ?it/s]

epoch = 34, custom_mae = 0.26085832715034485


Validating: 0it [00:00, ?it/s]

epoch = 35, custom_mae = 0.2607971429824829


Validating: 0it [00:00, ?it/s]

epoch = 36, custom_mae = 0.2575545310974121


Validating: 0it [00:00, ?it/s]

epoch = 37, custom_mae = 0.25552523136138916


Validating: 0it [00:00, ?it/s]

epoch = 38, custom_mae = 0.25502151250839233


Validating: 0it [00:00, ?it/s]

epoch = 39, custom_mae = 0.25492122769355774


Validating: 0it [00:00, ?it/s]

epoch = 40, custom_mae = 0.4733579754829407


Validating: 0it [00:00, ?it/s]

epoch = 41, custom_mae = 0.47727707028388977


Validating: 0it [00:00, ?it/s]

epoch = 42, custom_mae = 0.46877238154411316


Validating: 0it [00:00, ?it/s]

epoch = 43, custom_mae = 0.4073864817619324


Validating: 0it [00:00, ?it/s]

epoch = 44, custom_mae = 0.4034194350242615


Validating: 0it [00:00, ?it/s]

epoch = 45, custom_mae = 0.39921268820762634


Validating: 0it [00:00, ?it/s]

epoch = 46, custom_mae = 0.3743444085121155


Validating: 0it [00:00, ?it/s]

epoch = 47, custom_mae = 0.3544982671737671


Validating: 0it [00:00, ?it/s]

epoch = 48, custom_mae = 0.390868216753006


Validating: 0it [00:00, ?it/s]

epoch = 49, custom_mae = 0.38722851872444153


Validating: 0it [00:00, ?it/s]

epoch = 50, custom_mae = 0.31506767868995667


Validating: 0it [00:00, ?it/s]

epoch = 51, custom_mae = 0.3348071873188019


Validating: 0it [00:00, ?it/s]

epoch = 52, custom_mae = 0.449419766664505


Validating: 0it [00:00, ?it/s]

epoch = 53, custom_mae = 1.0196795463562012


Validating: 0it [00:00, ?it/s]

epoch = 54, custom_mae = 0.3185900151729584


Validating: 0it [00:00, ?it/s]

epoch = 55, custom_mae = 0.5459752082824707


Validating: 0it [00:00, ?it/s]

epoch = 56, custom_mae = 0.32770490646362305


Validating: 0it [00:00, ?it/s]

epoch = 57, custom_mae = 0.32056018710136414


Validating: 0it [00:00, ?it/s]

epoch = 58, custom_mae = 0.27127018570899963


Validating: 0it [00:00, ?it/s]

epoch = 59, custom_mae = 0.2876134216785431


Validating: 0it [00:00, ?it/s]

epoch = 60, custom_mae = 0.27988454699516296


Validating: 0it [00:00, ?it/s]

epoch = 61, custom_mae = 0.3360660970211029


Validating: 0it [00:00, ?it/s]

epoch = 62, custom_mae = 0.2908996641635895


Validating: 0it [00:00, ?it/s]

epoch = 63, custom_mae = 0.2599506378173828


Validating: 0it [00:00, ?it/s]

epoch = 64, custom_mae = 0.24802684783935547


Validating: 0it [00:00, ?it/s]

epoch = 65, custom_mae = 0.24360795319080353


Validating: 0it [00:00, ?it/s]

epoch = 66, custom_mae = 0.23930127918720245


Validating: 0it [00:00, ?it/s]

epoch = 67, custom_mae = 0.23409637808799744


Validating: 0it [00:00, ?it/s]

epoch = 68, custom_mae = 0.23303459584712982


Validating: 0it [00:00, ?it/s]

epoch = 69, custom_mae = 0.22909241914749146


Validating: 0it [00:00, ?it/s]

epoch = 70, custom_mae = 0.22557860612869263


Validating: 0it [00:00, ?it/s]

epoch = 71, custom_mae = 0.22163693606853485


Validating: 0it [00:00, ?it/s]

epoch = 72, custom_mae = 0.2212388962507248


Validating: 0it [00:00, ?it/s]

epoch = 73, custom_mae = 0.21893364191055298


Validating: 0it [00:00, ?it/s]

epoch = 74, custom_mae = 0.22953000664710999


Validating: 0it [00:00, ?it/s]

epoch = 75, custom_mae = 0.21656693518161774


Validating: 0it [00:00, ?it/s]

epoch = 76, custom_mae = 0.21578043699264526


Validating: 0it [00:00, ?it/s]

epoch = 77, custom_mae = 0.2150772660970688


Validating: 0it [00:00, ?it/s]

epoch = 78, custom_mae = 0.2150588482618332


Validating: 0it [00:00, ?it/s]

epoch = 79, custom_mae = 0.2150115817785263


Validating: 0it [00:00, ?it/s]

epoch = 80, custom_mae = 0.38815194368362427


Validating: 0it [00:00, ?it/s]

epoch = 81, custom_mae = 0.3974939286708832


Validating: 0it [00:00, ?it/s]

epoch = 82, custom_mae = 0.3291098177433014


Validating: 0it [00:00, ?it/s]

epoch = 83, custom_mae = 0.3655667304992676


Validating: 0it [00:00, ?it/s]

epoch = 84, custom_mae = 0.3411416709423065


Validating: 0it [00:00, ?it/s]

epoch = 85, custom_mae = 0.3039289116859436


Validating: 0it [00:00, ?it/s]

epoch = 86, custom_mae = 0.2840878367424011


Validating: 0it [00:00, ?it/s]

epoch = 87, custom_mae = 0.32107385993003845


Validating: 0it [00:00, ?it/s]

epoch = 88, custom_mae = 0.3381462097167969


Validating: 0it [00:00, ?it/s]

epoch = 89, custom_mae = 0.42691555619239807


Validating: 0it [00:00, ?it/s]

epoch = 90, custom_mae = 0.2655956447124481


Validating: 0it [00:00, ?it/s]

epoch = 91, custom_mae = 0.27401697635650635


Validating: 0it [00:00, ?it/s]

epoch = 92, custom_mae = 0.2840574085712433


Validating: 0it [00:00, ?it/s]

epoch = 93, custom_mae = 0.2616051733493805


Validating: 0it [00:00, ?it/s]

epoch = 94, custom_mae = 0.2762126922607422


Validating: 0it [00:00, ?it/s]

epoch = 95, custom_mae = 0.27267831563949585


Validating: 0it [00:00, ?it/s]

epoch = 96, custom_mae = 0.2725282609462738


Validating: 0it [00:00, ?it/s]

epoch = 97, custom_mae = 0.43421217799186707


Validating: 0it [00:00, ?it/s]

epoch = 98, custom_mae = 0.2554457187652588


Validating: 0it [00:00, ?it/s]

epoch = 99, custom_mae = 0.25205984711647034


Validating: 0it [00:00, ?it/s]

epoch = 100, custom_mae = 0.23413868248462677


Validating: 0it [00:00, ?it/s]

epoch = 101, custom_mae = 0.2362651526927948


Validating: 0it [00:00, ?it/s]

epoch = 102, custom_mae = 0.2189975082874298


Validating: 0it [00:00, ?it/s]

epoch = 103, custom_mae = 0.2184373140335083


Validating: 0it [00:00, ?it/s]

epoch = 104, custom_mae = 0.2208639681339264


Validating: 0it [00:00, ?it/s]

epoch = 105, custom_mae = 0.21457460522651672


Validating: 0it [00:00, ?it/s]

epoch = 106, custom_mae = 0.21133208274841309


Validating: 0it [00:00, ?it/s]

epoch = 107, custom_mae = 0.20908720791339874


Validating: 0it [00:00, ?it/s]

epoch = 108, custom_mae = 0.20967404544353485


Validating: 0it [00:00, ?it/s]

epoch = 109, custom_mae = 0.2084735482931137


Validating: 0it [00:00, ?it/s]

epoch = 110, custom_mae = 0.20568399131298065


Validating: 0it [00:00, ?it/s]

epoch = 111, custom_mae = 0.20485816895961761


Validating: 0it [00:00, ?it/s]

epoch = 112, custom_mae = 0.20372700691223145


Validating: 0it [00:00, ?it/s]

epoch = 113, custom_mae = 0.2030106484889984


Validating: 0it [00:00, ?it/s]

epoch = 114, custom_mae = 0.2018592655658722


Validating: 0it [00:00, ?it/s]

epoch = 115, custom_mae = 0.2019653171300888


Validating: 0it [00:00, ?it/s]

epoch = 116, custom_mae = 0.20166818797588348


Validating: 0it [00:00, ?it/s]

epoch = 117, custom_mae = 0.20138876140117645


Validating: 0it [00:00, ?it/s]

epoch = 118, custom_mae = 0.20110587775707245


Validating: 0it [00:00, ?it/s]

epoch = 119, custom_mae = 0.2011132538318634


Validating: 0it [00:00, ?it/s]

epoch = 120, custom_mae = 0.46689552068710327


Validating: 0it [00:00, ?it/s]

epoch = 121, custom_mae = 0.4393870234489441


Validating: 0it [00:00, ?it/s]

epoch = 122, custom_mae = 0.3745163083076477


Validating: 0it [00:00, ?it/s]

epoch = 123, custom_mae = 0.3012826144695282


Validating: 0it [00:00, ?it/s]

epoch = 124, custom_mae = 0.2698647081851959


Validating: 0it [00:00, ?it/s]

epoch = 125, custom_mae = 0.2915550768375397


Validating: 0it [00:00, ?it/s]

epoch = 126, custom_mae = 0.251067578792572


Validating: 0it [00:00, ?it/s]

epoch = 127, custom_mae = 0.25425758957862854


Validating: 0it [00:00, ?it/s]

epoch = 128, custom_mae = 0.296558141708374


Validating: 0it [00:00, ?it/s]

epoch = 129, custom_mae = 0.28452983498573303


Validating: 0it [00:00, ?it/s]

epoch = 130, custom_mae = 0.2502935230731964


Validating: 0it [00:00, ?it/s]

epoch = 131, custom_mae = 0.2581970691680908


Validating: 0it [00:00, ?it/s]

epoch = 132, custom_mae = 0.252856969833374


Validating: 0it [00:00, ?it/s]

epoch = 133, custom_mae = 0.2276977002620697


Validating: 0it [00:00, ?it/s]

epoch = 134, custom_mae = 0.23551452159881592


Validating: 0it [00:00, ?it/s]

epoch = 135, custom_mae = 0.24314025044441223


Validating: 0it [00:00, ?it/s]

epoch = 136, custom_mae = 0.23205046355724335


Validating: 0it [00:00, ?it/s]

epoch = 137, custom_mae = 0.22296904027462006


Validating: 0it [00:00, ?it/s]

epoch = 138, custom_mae = 0.2377835363149643


Validating: 0it [00:00, ?it/s]

epoch = 139, custom_mae = 0.21519336104393005


Validating: 0it [00:00, ?it/s]

epoch = 140, custom_mae = 0.21630962193012238


Validating: 0it [00:00, ?it/s]

epoch = 141, custom_mae = 0.21140067279338837


Validating: 0it [00:00, ?it/s]

epoch = 142, custom_mae = 0.21099045872688293


Validating: 0it [00:00, ?it/s]

epoch = 143, custom_mae = 0.2070341259241104


Validating: 0it [00:00, ?it/s]

epoch = 144, custom_mae = 0.20904980599880219


Validating: 0it [00:00, ?it/s]

epoch = 145, custom_mae = 0.20824944972991943


Validating: 0it [00:00, ?it/s]

epoch = 146, custom_mae = 0.20143425464630127


Validating: 0it [00:00, ?it/s]

epoch = 147, custom_mae = 0.20122191309928894


Validating: 0it [00:00, ?it/s]

epoch = 148, custom_mae = 0.20077961683273315


Validating: 0it [00:00, ?it/s]

epoch = 149, custom_mae = 0.19935199618339539


Validating: 0it [00:00, ?it/s]

epoch = 150, custom_mae = 0.19884125888347626


Validating: 0it [00:00, ?it/s]

epoch = 151, custom_mae = 0.19791509211063385


Validating: 0it [00:00, ?it/s]

epoch = 152, custom_mae = 0.19746963679790497


Validating: 0it [00:00, ?it/s]

epoch = 153, custom_mae = 0.19537152349948883


Validating: 0it [00:00, ?it/s]

epoch = 154, custom_mae = 0.1955849826335907


Validating: 0it [00:00, ?it/s]

epoch = 155, custom_mae = 0.19578240811824799


Validating: 0it [00:00, ?it/s]

epoch = 156, custom_mae = 0.19502891600131989


Validating: 0it [00:00, ?it/s]

epoch = 157, custom_mae = 0.19522534310817719


Validating: 0it [00:00, ?it/s]

epoch = 158, custom_mae = 0.19515658915042877


Validating: 0it [00:00, ?it/s]

epoch = 159, custom_mae = 0.19504739344120026


Validating: 0it [00:00, ?it/s]

epoch = 160, custom_mae = 0.2823018431663513


Validating: 0it [00:00, ?it/s]

epoch = 161, custom_mae = 0.2669485807418823


Validating: 0it [00:00, ?it/s]

epoch = 162, custom_mae = 0.3911692202091217


Validating: 0it [00:00, ?it/s]

epoch = 163, custom_mae = 0.3176860809326172


Validating: 0it [00:00, ?it/s]

epoch = 164, custom_mae = 0.2996276915073395


Validating: 0it [00:00, ?it/s]

epoch = 165, custom_mae = 0.2351353019475937


Validating: 0it [00:00, ?it/s]

epoch = 166, custom_mae = 0.28629574179649353


Validating: 0it [00:00, ?it/s]

epoch = 167, custom_mae = 0.22639773786067963


Validating: 0it [00:00, ?it/s]

epoch = 168, custom_mae = 0.22465063631534576


Validating: 0it [00:00, ?it/s]

epoch = 169, custom_mae = 0.4791729748249054


Validating: 0it [00:00, ?it/s]

epoch = 170, custom_mae = 0.2474047839641571


Validating: 0it [00:00, ?it/s]

epoch = 171, custom_mae = 0.24144555628299713


Validating: 0it [00:00, ?it/s]

epoch = 172, custom_mae = 0.2501378357410431


Validating: 0it [00:00, ?it/s]

epoch = 173, custom_mae = 0.38442593812942505


Validating: 0it [00:00, ?it/s]

epoch = 174, custom_mae = 0.2594760060310364


Validating: 0it [00:00, ?it/s]

epoch = 175, custom_mae = 0.22967574000358582


Validating: 0it [00:00, ?it/s]

epoch = 176, custom_mae = 0.22113136947155


Validating: 0it [00:00, ?it/s]

epoch = 177, custom_mae = 0.22120873630046844


Validating: 0it [00:00, ?it/s]

epoch = 178, custom_mae = 0.21351505815982819


Validating: 0it [00:00, ?it/s]

epoch = 179, custom_mae = 0.20977196097373962


Validating: 0it [00:00, ?it/s]

epoch = 180, custom_mae = 0.20635926723480225


Validating: 0it [00:00, ?it/s]

epoch = 181, custom_mae = 0.2058597207069397


Validating: 0it [00:00, ?it/s]

epoch = 182, custom_mae = 0.20579758286476135


Validating: 0it [00:00, ?it/s]

epoch = 183, custom_mae = 0.2046782672405243


Validating: 0it [00:00, ?it/s]

epoch = 184, custom_mae = 0.20188185572624207


Validating: 0it [00:00, ?it/s]

epoch = 185, custom_mae = 0.19995969533920288


Validating: 0it [00:00, ?it/s]

epoch = 186, custom_mae = 0.2012101113796234


Validating: 0it [00:00, ?it/s]

epoch = 187, custom_mae = 0.20024234056472778


Validating: 0it [00:00, ?it/s]

epoch = 188, custom_mae = 0.19672784209251404


Validating: 0it [00:00, ?it/s]

epoch = 189, custom_mae = 0.19601567089557648


Validating: 0it [00:00, ?it/s]

epoch = 190, custom_mae = 0.1957700550556183


Validating: 0it [00:00, ?it/s]

epoch = 191, custom_mae = 0.19444143772125244


Validating: 0it [00:00, ?it/s]

epoch = 192, custom_mae = 0.19354797899723053


Validating: 0it [00:00, ?it/s]

epoch = 193, custom_mae = 0.19374607503414154


Validating: 0it [00:00, ?it/s]

epoch = 194, custom_mae = 0.19314660131931305


Validating: 0it [00:00, ?it/s]

epoch = 195, custom_mae = 0.19308900833129883


Validating: 0it [00:00, ?it/s]

epoch = 196, custom_mae = 0.19316180050373077


Validating: 0it [00:00, ?it/s]

epoch = 197, custom_mae = 0.1928519904613495


Validating: 0it [00:00, ?it/s]

epoch = 198, custom_mae = 0.19280460476875305


Validating: 0it [00:00, ?it/s]

epoch = 199, custom_mae = 0.19277431070804596
train done.
validate done.
fold = 0, auc = 0.1927743149238386
inference done.


In [31]:
if len(CFG.folds) != CFG.n_folds:

    oof_score = get_score(oof_pred, oof_target, val_df['u_out'].values)
    print(f'MAE {oof_score}')

    oof_df = train.iloc[val_idxes[0], :1]
    oof_df['pressure'] = oof_pred
    oof_df.to_csv(OUTPUT_DIR / f'oof{CFG.exp_num}.csv',index = False)    
else:
    score = get_score(y, oof_total, train['u_out'].values)
    print(f'MAE {score}: folds: {scores}')

    oof_df = pd.DataFrame({'id': train['id'].values, 'pressure':oof_total.reshape(-1)})
    oof_df.to_csv(OUTPUT_DIR / f'oof{CFG.exp_num}.csv',index = False)
oof_df

MAE 0.1927743149238386


Unnamed: 0,id,pressure
240,241,6.614223
241,242,5.657218
242,243,6.600877
243,244,8.707904
244,245,11.016167
...,...,...
6035995,6035996,38.748112
6035996,6035997,37.935310
6035997,6035998,38.996235
6035998,6035999,37.546833


In [32]:
sub = pd.read_csv(DATA_DIR / 'sample_submission.csv')
sub['pressure'] = np.mean(sub_preds, axis=1)
sub.to_csv(OUTPUT_DIR / f'sub{CFG.exp_num}.csv',index = False)
sub

Unnamed: 0,id,pressure
0,1,6.227852
1,2,5.915128
2,3,7.060081
3,4,7.567587
4,5,9.120646
...,...,...
4023995,4023996,42.317497
4023996,4023997,40.796288
4023997,4023998,40.182091
4023998,4023999,35.935818


In [33]:
wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type='summary')
wandb.run.name = 'summary'
wandb.log({'CV_score': oof_score})
# wandb.save(utils.get_notebook_path())
wandb.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
CV_score,▁
Loss/val,█▅▄▄▃▂▂▂▄▃▂▄▂▁▁▁▃▂▂▃▁▁▁▁▃▂▁▁▁▁▁▁▂▁▃▁▁▁▁▁
custom_mae/val,█▅▄▄▃▂▂▂▄▃▂▄▂▁▁▁▃▂▂▃▁▁▁▁▃▂▁▁▁▁▁▁▂▁▃▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
CV_score,0.19277
Loss/val,0.19274
custom_mae/val,0.19277
epoch,199.0
trainer/global_step,94199.0


[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
CV_score,▁

0,1
CV_score,0.19277
