In [1]:
import sys
from time import time
import numpy as np
import pandas as pd
from pathlib import Path
import lightgbm as lgb
import matplotlib.pyplot as plt 
import seaborn as sns
from tqdm import tqdm
import copy
import wandb
from collections import OrderedDict

from sklearn.metrics import mean_absolute_error
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as torchdata

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import WandbLogger


In [2]:
sys.path.append('../../src/')
import utils as utils
from utils import Timer

In [3]:
class CFG:
    seed = 42
    exp_num = 11
    local = True
    n_folds = 5
    folds = [0]
    debug = False
    bias = 1000
    epochs = 200

    
    ######################
    # Dataset #
    ######################
    transforms = {
        "train": [{"name": ""}],
        "valid": [{"name": ""}],
        "test": [{"name": ""}]
    }

    ######################
    # Loaders #
    ######################
    loader_params = {
        "train": {
            'batch_size': 128,
            'shuffle': True,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': True,
        },
        "valid": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        },
        "test": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        }
    }

    ######################
    # Split #
    ######################
    split = "GroupKFold"
    split_params = {
        "n_splits": 5,
    }

    ######################
    # Model #
    ######################
    input_dim = 5

    dense_dim = 512
    lstm_dim = 512
    logit_dim = 512
    num_classes = 1

    ######################
    # Criterion #
    ######################
#     loss_name = "rmspe_loss"
#     loss_params: dict = {}

    ######################
    # Optimizer #
    ######################
    optimizer_name = "Adam"
    optimizer_params = {
        "lr": 0.001
    }

    ######################
    # Scheduler #
    ######################
    scheduler_name = "ReduceLROnPlateau"
    scheduler_params = {
        'factor': 0.2, 
        'patience': 7
    }

In [4]:
utils.set_seed(CFG.seed)

In [5]:
if CFG.local:
    DATA_DIR = Path("/home/knikaido/work/Ventilator-Pressure-Prediction/data/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('./output/')
else:
    DATA_DIR = Path("../input/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('')   

In [6]:
def get_transforms(phase: str):
    transforms = CFG.transforms
    if transforms is None:
        return None
    else:
        if transforms[phase] is None:
            return None
        trns_list = []
        for trns_conf in transforms[phase]:
            trns_name = trns_conf["name"]
            trns_params = {} if trns_conf.get("params") is None else \
                trns_conf["params"]
            if globals().get(trns_name) is not None:
                trns_cls = globals()[trns_name]
                trns_list.append(trns_cls(**trns_params))

        if len(trns_list) > 0:
            return Compose(trns_list)
        else:
            return None
        
        
class Normalize:
    def __call__(self, y: np.ndarray):
        max_vol = np.abs(y).max()
        y_vol = y * 1 / max_vol
        return np.asfortranarray(y_vol)


class Compose:
    def __init__(self, transforms: list):
        self.transforms = transforms

    def __call__(self, y: np.ndarray):
        for trns in self.transforms:
            y = trns(y)
        return y

In [7]:
def compute_metric(preds, trues, u_outs):
    """
    Metric for the problem, as I understood it.
    """
    
    y = trues
    w = 1 - u_outs
    
    assert y.shape == preds.shape and w.shape == y.shape, (y.shape, preds.shape, w.shape)
    
    mae = w * np.abs(y - preds)
    mae = mae.sum() / w.sum()
    
    return mae


class VentilatorLoss(nn.Module):
    """
    Directly optimizes the competition metric
    """
    def __call__(self, preds, y, u_out):
        w = 1 - u_out
        mae = w * (y - preds).abs()
        mae = mae.sum(-1) / w.sum(-1)

        return mae

In [8]:
def get_criterion():
    return VentilatorLoss()

In [9]:
# Custom optimizer
__OPTIMIZERS__ = {}


def get_optimizer(model: nn.Module):
    optimizer_name = CFG.optimizer_name
    if optimizer_name == "SAM":
        base_optimizer_name = CFG.base_optimizer
        if __OPTIMIZERS__.get(base_optimizer_name) is not None:
            base_optimizer = __OPTIMIZERS__[base_optimizer_name]
        else:
            base_optimizer = optim.__getattribute__(base_optimizer_name)
        return SAM(model.parameters(), base_optimizer, **CFG.optimizer_params)

    if __OPTIMIZERS__.get(optimizer_name) is not None:
        return __OPTIMIZERS__[optimizer_name](model.parameters(),
                                              **CFG.optimizer_params)
    else:
        return optim.__getattribute__(optimizer_name)(model.parameters(),
                                                      **CFG.optimizer_params)


def get_scheduler(optimizer):
    scheduler_name = CFG.scheduler_name

    if scheduler_name is None:
        return
    else:
        return optim.lr_scheduler.__getattribute__(scheduler_name)(
            optimizer, **CFG.scheduler_params)

In [10]:
# validation
splitter = getattr(model_selection, CFG.split)(**CFG.split_params)

In [11]:
class VentilatorDataset(torchdata.Dataset):
    def __init__(self, df, train_col):
        if "pressure" not in df.columns:
            df['pressure'] = 0
        self.df = df
        self.groups = df.groupby('breath_id').groups
        self.keys = list(self.groups.keys())
        self.train_col = train_col
        
    def __len__(self):
        return len(self.groups)

    def __getitem__(self, idx):
        indexes = self.groups[self.keys[idx]]
        df_ = self.df.iloc[indexes]
        
        input_ = df_[self.train_col].values
        u_out_ = df_['u_out'].values
        p_ = df_['pressure'].values

        data = {
            "input": input_.astype(np.float32),
            "u_out": u_out_.astype(np.float32),
            "p": p_.astype(np.float32),
        }
        
        return data

In [12]:
class RNNModel(nn.Module):
    def __init__(
        self,
        input_dim=4,
        lstm_dim=256,
        dense_dim=256,
        logit_dim=256,
        num_classes=1,
    ):
        super().__init__()

        self.mlp = nn.Sequential(
            nn.Linear(input_dim, dense_dim // 2),
            nn.ReLU(),
            nn.Linear(dense_dim // 2, dense_dim),
            nn.ReLU(),
        )

        self.lstm = nn.LSTM(dense_dim, lstm_dim, batch_first=True, bidirectional=True)

        self.logits = nn.Sequential(
            nn.Linear(lstm_dim * 2, logit_dim),
            nn.ReLU(),
            nn.Linear(logit_dim, num_classes),
        )

    def forward(self, x):
        features = self.mlp(x)
        features, _ = self.lstm(features)
        pred = self.logits(features)
        return pred

In [13]:
# Learner class(pytorch-lighting)
class Learner(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.criterion = get_criterion()
    
    def training_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        return loss
    
    def validation_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        
        self.log(f'Loss/val', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
        
        output = OrderedDict({
            "targets": d_['p'].detach(), "preds": output.detach(), "u_outs": d_['u_out'].detach(), "loss": loss.detach()
        })
        return output
    
    def validation_epoch_end(self, outputs):

        targets = torch.cat([o["targets"].view(-1) for o in outputs]).cpu().numpy()
        preds = torch.cat([o["preds"].view(-1) for o in outputs]).cpu().numpy()
        u_outs = torch.cat([o["u_outs"].view(-1) for o in outputs]).cpu().numpy()

        score = get_score(preds, targets, u_outs)
        self.log(f'custom_mae/val', score, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        print(f'epoch = {self.current_epoch}, custom_mae = {score}')

    def configure_optimizers(self):
        optimizer = get_optimizer(self.model)
        scheduler = get_scheduler(optimizer)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "Loss/val"}

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
def get_score(y_pred, y_true, u_outs):
    return compute_metric(y_pred, y_true, u_outs)


def to_np(input):
    return input.detach().cpu().numpy()

# oof
def evaluate(model, loaders, phase):
    model.eval()
    pred_list = []
    target_list = []
    with torch.no_grad():
        for batch in loaders[phase]:
            d_ = batch
            d_['input'] = d_['input'].to(device)
            output = model(d_['input'])
#             output = nn.Softmax(dim=1)(output)
            pred_list.append(to_np(output))
            target_list.append(to_np(d_['p']))

    pred_list = np.concatenate(pred_list).reshape(-1)
    target_list = np.concatenate(target_list).reshape(-1)
    model.train()
    return pred_list, target_list

In [16]:
train = pd.read_csv(DATA_DIR / 'train.csv')
test = pd.read_csv(DATA_DIR / 'test.csv')
display(train), display(test)

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.000000,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.355850,0,12.234987
...,...,...,...,...,...,...,...,...
6035995,6035996,125749,50,10,2.504603,1.489714,1,3.869032
6035996,6035997,125749,50,10,2.537961,1.488497,1,3.869032
6035997,6035998,125749,50,10,2.571408,1.558978,1,3.798729
6035998,6035999,125749,50,10,2.604744,1.272663,1,4.079938


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.000000,0.000000,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.230610,0
4,5,0,5,20,0.127644,26.320956,0
...,...,...,...,...,...,...,...
4023995,4023996,125748,20,10,2.530117,4.971245,1
4023996,4023997,125748,20,10,2.563853,4.975709,1
4023997,4023998,125748,20,10,2.597475,4.979468,1
4023998,4023999,125748,20,10,2.631134,4.982648,1


(None, None)

In [17]:
def get_raw_features(input_df, dataType = 'train'):
    colum = ['time_step', 'u_in']

    return input_df[colum]

In [18]:
def get_category_features(input_df, dataType = 'train'):
    output_df = copy.deepcopy(input_df)
    colum = ['R', 'C']
    
#     output_df['R_C'] = output_df['R'] + output_df['C'] * 10

    return output_df[colum].astype('category')

In [19]:
def get_diff_shift_features(input_df, dataType = 'train'):
    
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    b_id_gby = input_df.groupby(['breath_id'])
    shift_idx = [-2, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    
    def g_by_diff(c_, i):
        temp_df=pd.concat([output_df.loc[:, ['breath_id', c_]], output_df.loc[:, ['breath_id', c_]].reset_index().shift(i).rename(columns=lambda s:s+'_shift')], axis=1)
        df_with_diff=temp_df.loc[(temp_df['breath_id']==temp_df['breath_id_shift']), slice(None)]
        return(df_with_diff[c_]-df_with_diff[f'{c_}_shift'])
    
    # diffより直接引いたほうが早い
    for i in shift_idx:
        output_df[f'u_in_shift_{i}'] = b_id_gby['u_in'].shift(i)
        output_df[f'u_in_diff_{i}'] = g_by_diff('u_in', i)

        output_df[f'time_step_shift_{i}'] = b_id_gby['time_step'].shift(i)
        output_df[f'time_step_diff_{i}'] = g_by_diff('time_step', i)
        
    for i in shift_idx:
#         output_df[f'u_in_shift_{i}'] = b_id_gby['u_in'].shift(i)
        output_df[f'u_in_diff2_{i}'] = g_by_diff(f'u_in_diff_{i}', 1)

    
    return output_df.iloc[:, c_num:]

In [20]:
def get_work_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]

    output_df['work'] = ((output_df['u_in'] + output_df['u_in'].shift(1).fillna(0))/2 * output_df['time_step'].diff().fillna(0)).clip(0,)
    output_df['work_roll_10'] = output_df.groupby(output_df['breath_id'])['work'].rolling(window=10, min_periods=1).sum().reset_index(level=0,drop=True)
    output_df['work_roll_15'] = output_df.groupby(output_df['breath_id'])['work'].rolling(window=15, min_periods=1).sum().reset_index(level=0,drop=True)
    
    return output_df.iloc[:, c_num:]

In [21]:
def get_cum_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    b_id_gby = input_df.groupby(['breath_id'])
    
    output_df['u_in_cumsum'] = b_id_gby['u_in'].cumsum()
    output_df['time_step_cumsum'] = b_id_gby['time_step'].cumsum()
    
    return output_df.iloc[:, c_num:]

In [22]:
def get_agg_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    # Dict for aggregations
    create_feature_dict = {
        'u_in': [np.max, np.std, np.mean, 'first', 'last'],
    }
    
    def get_agg_window(start_time=0, end_time=3.0, add_suffix = False):
        
        df_tgt = output_df[(output_df['time_step'] >= start_time) & (output_df['time_step'] <= end_time)]
        df_feature = df_tgt.groupby(['breath_id']).agg(create_feature_dict)
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(start_time) + '_' + str(end_time))
            
        return df_feature
    
    df_agg_feature = get_agg_window().reset_index()
    
#     df_tmp = get_agg_window(start_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(start_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')

    output_df = pd.merge(output_df, df_agg_feature, how='left', on='breath_id')
    
    
    return output_df.iloc[:, c_num:]

In [23]:
def to_feature(input_df, dataType = 'train'):
    """input_df を特徴量行列に変換した新しいデータフレームを返す.
    """

    processors = [
        get_raw_features,
        get_category_features,
        get_diff_shift_features,
#         get_work_features,
        get_cum_features,
        get_agg_features
    ]

    out_df = pd.DataFrame()

    for func in tqdm(processors, total=len(processors)):
        with Timer(prefix='' + func.__name__ + ' '):
            _df = func(input_df, dataType)

        # 長さが等しいことをチェック (ずれている場合, func の実装がおかしい)
        assert len(_df) == len(input_df), func.__name__
        out_df = pd.concat([out_df, _df], axis=1)
    out_df = utils.reduce_mem_usage(out_df)
    
    return out_df

In [24]:
train_df = to_feature(train, dataType = 'train')
test_df = to_feature(test, dataType = 'test')

 40%|████      | 2/5 [00:00<00:00, 10.11it/s]

get_raw_features  0.014[s]
get_category_features  0.153[s]


 40%|████      | 2/5 [00:19<00:00, 10.11it/s]

get_diff_shift_features  42.836[s]


 60%|██████    | 3/5 [00:44<00:36, 18.43s/it]

get_cum_features  0.200[s]


 80%|████████  | 4/5 [00:45<00:12, 12.13s/it]

get_agg_features  1.162[s]


100%|██████████| 5/5 [00:48<00:00,  9.68s/it]
 40%|████      | 2/5 [00:00<00:00, 15.37it/s]

Mem. usage decreased from 3189.03 Mb to 805.89 Mb (74.7% reduction)
get_raw_features  0.009[s]
get_category_features  0.099[s]


 40%|████      | 2/5 [00:14<00:00, 15.37it/s]

get_diff_shift_features  27.176[s]


 60%|██████    | 3/5 [00:28<00:23, 11.71s/it]

get_cum_features  0.111[s]


 80%|████████  | 4/5 [00:29<00:07,  7.71s/it]

get_agg_features  0.723[s]


100%|██████████| 5/5 [00:30<00:00,  6.16s/it]


Mem. usage decreased from 2126.02 Mb to 537.26 Mb (74.7% reduction)


In [25]:
# train_df['pressure'] = train['pressure']
# train_df[train['u_out']==0].corr().pressure.sort_values(ascending=False)

In [26]:
# train_df.drop('pressure', axis=1, inplace=True)

In [27]:
ss = StandardScaler()
ss.fit(train_df)

train_df = pd.DataFrame(ss.transform(train_df), columns=list(train_df.columns))
train_mean = train_df.mean()
train_df = train_df.fillna(train_df.mean())

test_df = pd.DataFrame(ss.transform(test_df), columns=list(test_df.columns))
test_df = test_df.fillna(train_mean)

In [28]:
display(train_df), display(test_df)

Unnamed: 0,time_step,u_in,R,C,u_in_shift_-2,u_in_diff_-2,time_step_shift_-2,time_step_diff_-2,u_in_shift_-1,u_in_diff_-1,...,u_in_diff2_8,u_in_diff2_9,u_in_diff2_10,u_in_cumsum,time_step_cumsum,u_in_amax,u_in_std,u_in_mean,u_in_first,u_in_last
0,-1.706609,-0.538776,-0.359072,1.394522,1.240467e+00,-2.503374e+00,-1.703993e+00,-5.629722e-01,8.449263e-01,-2.511949e+00,...,1.984286e-18,5.132482e-19,-5.889286e-19,-0.980690,-1.116536,-0.245417,0.119488,0.513998,-0.550081,0.282547
1,-1.662664,0.823912,-0.359072,1.394522,1.264159e+00,-5.308007e-01,-1.658398e+00,-7.206978e-01,1.157443e+00,-5.786234e-01,...,1.984286e-18,5.132482e-19,-5.889286e-19,-0.936297,-1.115471,-0.245417,0.119488,0.513998,-0.550081,0.282547
2,-1.618480,1.130953,-0.359072,1.394522,1.467412e+00,-3.576065e-01,-1.612640e+00,-8.784233e-01,1.179935e+00,-5.675740e-02,...,1.984286e-18,5.132482e-19,-5.889286e-19,-0.881968,-1.113334,-0.245417,0.119488,0.513998,-0.550081,0.282547
3,-1.574017,1.153051,-0.359072,1.394522,1.619540e+00,-5.333698e-01,-1.567208e+00,-8.521357e-01,1.372891e+00,-3.633358e-01,...,1.984286e-18,5.132482e-19,-5.889286e-19,-0.826884,-1.110121,-0.245417,0.119488,0.513998,-0.550081,0.282547
4,-1.529395,1.342625,-0.359072,1.394522,1.608317e+00,-2.398601e-01,-1.521613e+00,-7.469854e-01,1.517311e+00,-2.756753e-01,...,1.984286e-18,5.132482e-19,-5.889286e-19,-0.765688,-1.105830,-0.245417,0.119488,0.513998,-0.550081,0.282547
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035995,1.562294,-0.434126,1.171893,-0.937525,-4.320053e-01,-5.327896e-02,1.649282e+00,-2.738087e-01,-4.356242e-01,-1.572538e-02,...,-5.221866e-04,1.423478e-02,6.424695e-03,-0.046433,1.892880,-0.334970,-0.430004,-0.438473,-0.313459,-3.250301
6035996,1.605641,-0.434199,1.171893,-0.937525,-4.548400e-01,-2.202934e-02,1.693733e+00,-2.475211e-01,-4.302972e-01,-2.550839e-02,...,1.468906e-02,-7.816164e-03,6.349836e-03,-0.042811,1.973948,-0.334970,-0.430004,-0.438473,-0.313459,-3.250301
6035997,1.651539,-0.428965,1.171893,-0.937525,-4.380841e-01,-3.732968e-02,1.738184e+00,-1.686583e-01,-4.519751e-01,2.317612e-02,...,-6.271822e-04,2.136300e-02,-2.090453e-03,-0.038585,2.055017,-0.334970,-0.430004,-0.438473,-0.313459,-3.250301
6035998,1.694886,-0.450263,1.171893,-0.937525,-2.216228e-17,4.515522e-19,1.010101e-16,2.726380e-15,-4.360681e-01,-4.455089e-02,...,-2.390997e-02,-3.742277e-02,-1.904411e-02,-0.035567,2.138062,-0.334970,-0.430004,-0.438473,-0.313459,-3.250301


Unnamed: 0,time_step,u_in,R,C,u_in_shift_-2,u_in_diff_-2,time_step_shift_-2,time_step_diff_-2,u_in_shift_-1,u_in_diff_-1,...,u_in_diff2_8,u_in_diff2_9,u_in_diff2_10,u_in_cumsum,time_step_cumsum,u_in_amax,u_in_std,u_in_mean,u_in_first,u_in_last
0,-1.706609,-0.544978,-1.124554,-0.354513,6.126275e-01,-1.651318e+00,-1.708896e+00,1.014283e+00,2.101770e-02,-1.041172e+00,...,1.984286e-18,5.132482e-19,-5.889286e-19,-0.980892,-1.116536,0.048118,0.367372,0.364841,-0.553395,0.266802
1,-1.664975,0.014441,-1.124554,-0.354513,1.138217e+00,-1.549414e+00,-1.666161e+00,1.014283e+00,5.614117e-01,-9.894817e-01,...,1.984286e-18,5.132482e-19,-5.889286e-19,-0.962744,-1.115527,0.048118,0.367372,0.364841,-0.553395,0.266802
2,-1.623261,0.545366,-1.124554,-0.354513,1.544723e+00,-1.325054e+00,-1.623426e+00,1.014283e+00,1.060374e+00,-9.132784e-01,...,1.984286e-18,5.132482e-19,-5.889286e-19,-0.927355,-1.113508,0.048118,0.367372,0.364841,-0.553395,0.266802
3,-1.581587,1.035584,-1.124554,-0.354513,1.876412e+00,-1.060445e+00,-1.580773e+00,1.040571e+00,1.446285e+00,-7.102472e-01,...,1.984286e-18,5.132482e-19,-5.889286e-19,-0.876082,-1.110476,0.048118,0.367372,0.364841,-0.553395,0.266802
4,-1.539913,1.414733,-1.124554,-0.354513,2.119567e+00,-8.377977e-01,-1.537956e+00,1.014283e+00,1.761169e+00,-5.839523e-01,...,1.984286e-18,5.132482e-19,-5.889286e-19,-0.812623,-1.106441,0.048118,0.367372,0.364841,-0.553395,0.266802
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4023995,1.595442,-0.174842,-0.359072,-0.937525,-1.589231e-01,-4.658715e-02,1.683274e+00,-5.103970e-01,-1.713460e-01,-1.650026e-02,...,4.661974e-03,4.013906e-03,2.409965e-03,-0.510644,1.922539,-0.027504,-0.551590,-0.843326,-0.156195,0.278611
4023996,1.641339,-0.174552,-0.359072,-0.937525,-1.586114e-01,-4.644624e-02,1.727725e+00,-4.578218e-01,-1.710501e-01,-1.640424e-02,...,4.930205e-03,4.293629e-03,2.860898e-03,-0.498571,2.003607,-0.027504,-0.551590,-0.843326,-0.156195,0.278611
4023997,1.684687,-0.174261,-0.359072,-0.937525,-1.586114e-01,-4.633293e-02,1.774791e+00,-6.944102e-01,-1.707541e-01,-1.632514e-02,...,5.128713e-03,4.563137e-03,3.192414e-03,-0.486800,2.086653,-0.027504,-0.551590,-0.843326,-0.156195,0.278611
4023998,1.728034,-0.173970,-0.359072,-0.937525,-2.216228e-17,4.515522e-19,1.010101e-16,2.726380e-15,-1.707541e-01,-1.626321e-02,...,5.310814e-03,4.765857e-03,3.512345e-03,-0.474726,2.169699,-0.027504,-0.551590,-0.843326,-0.156195,0.278611


(None, None)

In [29]:
train_col = train_df.columns.to_list()

In [30]:
train_df = pd.concat([train_df, train[['id', 'breath_id', 'pressure', 'u_out']]], axis=1)
test_df = pd.concat([test_df, test[['id', 'breath_id', 'u_out']]], axis=1)

In [31]:
oof_total = np.zeros((len(train), CFG.num_classes))
sub_preds = np.zeros((test.shape[0], len(CFG.folds)))
val_idxes = []
models = []
y = train['pressure']
groups = train['breath_id']
gkfold = model_selection.GroupKFold(n_splits=CFG.n_folds)
scores = []
input_dim = len(train_col)

for i, (trn_idx, val_idx) in enumerate(splitter.split(train_df, y, groups)):
    if i not in CFG.folds:
        continue

    trn_df = train_df.loc[trn_idx, :].reset_index(drop=True)
    val_df = train_df.loc[val_idx, :].reset_index(drop=True)
    trn_y = y.values[trn_idx]
    val_y = y.values[val_idx]
    
    
    loaders = {
        phase: torchdata.DataLoader(
            VentilatorDataset(
                df_, train_col
            ),
            **CFG.loader_params[phase])  # type: ignore
        for phase, df_ in zip(["train", "valid", "test"], [trn_df, val_df, test_df])
    }
    
    
    model = RNNModel(
        input_dim=input_dim,
        lstm_dim=CFG.lstm_dim,
        dense_dim=CFG.dense_dim,
        logit_dim=CFG.logit_dim,
        num_classes=CFG.num_classes,
    )
    model_name = model.__class__.__name__
#     break
    
    learner = Learner(model)
    
    # loggers
    RUN_NAME = f'exp{str(CFG.exp_num)}'
    wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type=RUN_NAME + f'-fold-{i}')
    wandb.run.name = RUN_NAME + f'-fold-{i}'
    wandb_config = wandb.config
    wandb_config.model_name = model_name
    wandb.watch(model)
    
    # callbacks
    callbacks = []
    checkpoint_callback = ModelCheckpoint(
        monitor=f'Loss/val',
        mode='min',
        dirpath=OUTPUT_DIR,
        verbose=False,
        save_weights_only=True,
        filename=f'{model_name}-{learner.current_epoch}-{i}')
    callbacks.append(checkpoint_callback)

#     early_stop_callback = EarlyStopping(
#         monitor='Loss/val',
#         min_delta=0.00,
#         patience=10,
#         verbose=True,
#         mode='min')
#     callbacks.append(early_stop_callback)
    
    loggers = []
    loggers.append(WandbLogger())
    
    trainer = pl.Trainer(
        logger=loggers,
        callbacks=callbacks,
        max_epochs=CFG.epochs,
        default_root_dir=OUTPUT_DIR,
        gpus=1,
#         fast_dev_run=DEBUG,
        deterministic=True,
        benchmark=False,
        )
    
    trainer.fit(learner, train_dataloader=loaders['train'], val_dataloaders=loaders['valid'])
#     trainer.save_checkpoint(OUTPUT_DIR / "last.ckpt")
    print('train done.')
    
    #############
    # validation (to make oof)
    #############
    checkpoint = torch.load(checkpoint_callback.best_model_path)
    learner.load_state_dict(checkpoint['state_dict'])
    
    model = model.to(device)
    oof_pred, oof_target = evaluate(model, loaders, phase="valid")
    models.append(model)
    
    oof_score = get_score(oof_pred, oof_target, val_df['u_out'].values)
    scores.append(oof_score)
    oof_total[val_idx] = oof_pred.reshape(1, -1).T / CFG.bias
    val_idxes.append(val_idx)
    
    print('validate done.')
    print(f'fold = {i}, auc = {oof_score}')
    wandb.log({'CV_score': oof_score})
    
    #############
    # inference
    #############
    test_pred, _ = evaluate(model, loaders, phase="test")
    sub_preds[:, i] = test_pred
    
    print('inference done.')

# test_preds_total = np.array(test_preds_total)


[34m[1mwandb[0m: Currently logged in as: [33msqrt4kaido[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.3 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type           | Params
---------------------------------------------
0 | model     | RNNModel       | 4.9 M 
1 | criterion | VentilatorLoss | 0     
---------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.511    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

epoch = 0, custom_mae = 17.401975631713867


Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

epoch = 0, custom_mae = 1.2780179977416992


Validating: 0it [00:00, ?it/s]

epoch = 1, custom_mae = 0.9160822033882141


Validating: 0it [00:00, ?it/s]

epoch = 2, custom_mae = 0.8100554347038269


Validating: 0it [00:00, ?it/s]

epoch = 3, custom_mae = 0.8084608912467957


Validating: 0it [00:00, ?it/s]

epoch = 4, custom_mae = 0.7654162645339966


Validating: 0it [00:00, ?it/s]

epoch = 5, custom_mae = 0.7016268968582153


Validating: 0it [00:00, ?it/s]

epoch = 6, custom_mae = 0.6840551495552063


Validating: 0it [00:00, ?it/s]

epoch = 7, custom_mae = 0.6486701369285583


Validating: 0it [00:00, ?it/s]

epoch = 8, custom_mae = 0.6565215587615967


Validating: 0it [00:00, ?it/s]

epoch = 9, custom_mae = 0.6572207808494568


Validating: 0it [00:00, ?it/s]

epoch = 10, custom_mae = 0.6343430876731873


Validating: 0it [00:00, ?it/s]

epoch = 11, custom_mae = 0.6135852932929993


Validating: 0it [00:00, ?it/s]

epoch = 12, custom_mae = 0.5317761898040771


Validating: 0it [00:00, ?it/s]

epoch = 13, custom_mae = 0.61543208360672


Validating: 0it [00:00, ?it/s]

epoch = 14, custom_mae = 0.5402854084968567


Validating: 0it [00:00, ?it/s]

epoch = 15, custom_mae = 0.4979722797870636


Validating: 0it [00:00, ?it/s]

epoch = 16, custom_mae = 0.4942989647388458


Validating: 0it [00:00, ?it/s]

epoch = 17, custom_mae = 0.5418785810470581


Validating: 0it [00:00, ?it/s]

epoch = 18, custom_mae = 0.5286137461662292


Validating: 0it [00:00, ?it/s]

epoch = 19, custom_mae = 0.4647052586078644


Validating: 0it [00:00, ?it/s]

epoch = 20, custom_mae = 0.5153690576553345


Validating: 0it [00:00, ?it/s]

epoch = 21, custom_mae = 0.5136891007423401


Validating: 0it [00:00, ?it/s]

epoch = 22, custom_mae = 0.5338268280029297


Validating: 0it [00:00, ?it/s]

epoch = 23, custom_mae = 0.4418579339981079


Validating: 0it [00:00, ?it/s]

epoch = 24, custom_mae = 0.4767453074455261


Validating: 0it [00:00, ?it/s]

epoch = 25, custom_mae = 0.4292917251586914


Validating: 0it [00:00, ?it/s]

epoch = 26, custom_mae = 0.4502922296524048


Validating: 0it [00:00, ?it/s]

epoch = 27, custom_mae = 0.4322549104690552


Validating: 0it [00:00, ?it/s]

epoch = 28, custom_mae = 0.4076730012893677


Validating: 0it [00:00, ?it/s]

epoch = 29, custom_mae = 0.4438994526863098


Validating: 0it [00:00, ?it/s]

epoch = 30, custom_mae = 0.4042321443557739


Validating: 0it [00:00, ?it/s]

epoch = 31, custom_mae = 0.4358953535556793


Validating: 0it [00:00, ?it/s]

epoch = 32, custom_mae = 0.391186386346817


Validating: 0it [00:00, ?it/s]

epoch = 33, custom_mae = 0.48269549012184143


Validating: 0it [00:00, ?it/s]

epoch = 34, custom_mae = 0.37713977694511414


Validating: 0it [00:00, ?it/s]

epoch = 35, custom_mae = 0.4887165427207947


Validating: 0it [00:00, ?it/s]

epoch = 36, custom_mae = 0.40684354305267334


Validating: 0it [00:00, ?it/s]

epoch = 37, custom_mae = 0.3580077886581421


Validating: 0it [00:00, ?it/s]

epoch = 38, custom_mae = 0.3666445016860962


Validating: 0it [00:00, ?it/s]

epoch = 39, custom_mae = 0.3998923897743225


Validating: 0it [00:00, ?it/s]

epoch = 40, custom_mae = 0.433761864900589


Validating: 0it [00:00, ?it/s]

epoch = 41, custom_mae = 0.417447566986084


Validating: 0it [00:00, ?it/s]

epoch = 42, custom_mae = 0.42093151807785034


Validating: 0it [00:00, ?it/s]

epoch = 43, custom_mae = 0.3743779957294464


Validating: 0it [00:00, ?it/s]

epoch = 44, custom_mae = 0.42125779390335083


Validating: 0it [00:00, ?it/s]

epoch = 45, custom_mae = 0.423481822013855


Validating: 0it [00:00, ?it/s]

epoch = 46, custom_mae = 0.3193477690219879


Validating: 0it [00:00, ?it/s]

epoch = 47, custom_mae = 0.30675655603408813


Validating: 0it [00:00, ?it/s]

epoch = 48, custom_mae = 0.31604111194610596


Validating: 0it [00:00, ?it/s]

epoch = 49, custom_mae = 0.3153683543205261


Validating: 0it [00:00, ?it/s]

epoch = 50, custom_mae = 0.2912755608558655


Validating: 0it [00:00, ?it/s]

epoch = 51, custom_mae = 0.29264166951179504


Validating: 0it [00:00, ?it/s]

epoch = 52, custom_mae = 0.297042578458786


Validating: 0it [00:00, ?it/s]

epoch = 53, custom_mae = 0.28448134660720825


Validating: 0it [00:00, ?it/s]

epoch = 54, custom_mae = 0.285397469997406


Validating: 0it [00:00, ?it/s]

epoch = 55, custom_mae = 0.28704833984375


Validating: 0it [00:00, ?it/s]

epoch = 56, custom_mae = 0.2906731069087982


Validating: 0it [00:00, ?it/s]

epoch = 57, custom_mae = 0.2814682424068451


Validating: 0it [00:00, ?it/s]

epoch = 58, custom_mae = 0.27733293175697327


Validating: 0it [00:00, ?it/s]

epoch = 59, custom_mae = 0.27949219942092896


Validating: 0it [00:00, ?it/s]

epoch = 60, custom_mae = 0.27753952145576477


Validating: 0it [00:00, ?it/s]

epoch = 61, custom_mae = 0.29362452030181885


Validating: 0it [00:00, ?it/s]

epoch = 62, custom_mae = 0.27538925409317017


Validating: 0it [00:00, ?it/s]

epoch = 63, custom_mae = 0.27282753586769104


Validating: 0it [00:00, ?it/s]

epoch = 64, custom_mae = 0.27391988039016724


Validating: 0it [00:00, ?it/s]

epoch = 65, custom_mae = 0.2807900607585907


Validating: 0it [00:00, ?it/s]

epoch = 66, custom_mae = 0.26693078875541687


Validating: 0it [00:00, ?it/s]

epoch = 67, custom_mae = 0.280052125453949


Validating: 0it [00:00, ?it/s]

epoch = 68, custom_mae = 0.2686581015586853


Validating: 0it [00:00, ?it/s]

epoch = 69, custom_mae = 0.2663007378578186


Validating: 0it [00:00, ?it/s]

epoch = 70, custom_mae = 0.2674862742424011


Validating: 0it [00:00, ?it/s]

epoch = 71, custom_mae = 0.2730710506439209


Validating: 0it [00:00, ?it/s]

epoch = 72, custom_mae = 0.2724994122982025


Validating: 0it [00:00, ?it/s]

epoch = 73, custom_mae = 0.2656609117984772


Validating: 0it [00:00, ?it/s]

epoch = 74, custom_mae = 0.2644394040107727


Validating: 0it [00:00, ?it/s]

epoch = 75, custom_mae = 0.26335394382476807


Validating: 0it [00:00, ?it/s]

epoch = 76, custom_mae = 0.26647618412971497


Validating: 0it [00:00, ?it/s]

epoch = 77, custom_mae = 0.27331939339637756


Validating: 0it [00:00, ?it/s]

epoch = 78, custom_mae = 0.26083409786224365


Validating: 0it [00:00, ?it/s]

epoch = 79, custom_mae = 0.2627609968185425


Validating: 0it [00:00, ?it/s]

epoch = 80, custom_mae = 0.2621610462665558


Validating: 0it [00:00, ?it/s]

epoch = 81, custom_mae = 0.2605752944946289


Validating: 0it [00:00, ?it/s]

epoch = 82, custom_mae = 0.260576993227005


Validating: 0it [00:00, ?it/s]

epoch = 83, custom_mae = 0.2588895261287689


Validating: 0it [00:00, ?it/s]

epoch = 84, custom_mae = 0.2568439543247223


Validating: 0it [00:00, ?it/s]

epoch = 85, custom_mae = 0.26299503445625305


Validating: 0it [00:00, ?it/s]

epoch = 86, custom_mae = 0.2556528151035309


Validating: 0it [00:00, ?it/s]

epoch = 87, custom_mae = 0.25908032059669495


Validating: 0it [00:00, ?it/s]

epoch = 88, custom_mae = 0.257066011428833


Validating: 0it [00:00, ?it/s]

epoch = 89, custom_mae = 0.2562018036842346


Validating: 0it [00:00, ?it/s]

epoch = 90, custom_mae = 0.2531166970729828


Validating: 0it [00:00, ?it/s]

epoch = 91, custom_mae = 0.2537979185581207


Validating: 0it [00:00, ?it/s]

epoch = 92, custom_mae = 0.2660325765609741


Validating: 0it [00:00, ?it/s]

epoch = 93, custom_mae = 0.2569926381111145


Validating: 0it [00:00, ?it/s]

epoch = 94, custom_mae = 0.2523874044418335


Validating: 0it [00:00, ?it/s]

epoch = 95, custom_mae = 0.2506985366344452


Validating: 0it [00:00, ?it/s]

epoch = 96, custom_mae = 0.25382286310195923


Validating: 0it [00:00, ?it/s]

epoch = 97, custom_mae = 0.25439685583114624


Validating: 0it [00:00, ?it/s]

epoch = 98, custom_mae = 0.26427629590034485


Validating: 0it [00:00, ?it/s]

epoch = 99, custom_mae = 0.2530759871006012


Validating: 0it [00:00, ?it/s]

epoch = 100, custom_mae = 0.25191035866737366


Validating: 0it [00:00, ?it/s]

epoch = 101, custom_mae = 0.25722554326057434


Validating: 0it [00:00, ?it/s]

epoch = 102, custom_mae = 0.25053492188453674


Validating: 0it [00:00, ?it/s]

epoch = 103, custom_mae = 0.2542229890823364


Validating: 0it [00:00, ?it/s]

epoch = 104, custom_mae = 0.252645879983902


Validating: 0it [00:00, ?it/s]

epoch = 105, custom_mae = 0.2530703842639923


Validating: 0it [00:00, ?it/s]

epoch = 106, custom_mae = 0.25353991985321045


Validating: 0it [00:00, ?it/s]

epoch = 107, custom_mae = 0.250175803899765


Validating: 0it [00:00, ?it/s]

epoch = 108, custom_mae = 0.2509814500808716


Validating: 0it [00:00, ?it/s]

epoch = 109, custom_mae = 0.25077924132347107


Validating: 0it [00:00, ?it/s]

epoch = 110, custom_mae = 0.2594130039215088


Validating: 0it [00:00, ?it/s]

epoch = 111, custom_mae = 0.24949830770492554


Validating: 0it [00:00, ?it/s]

epoch = 112, custom_mae = 0.25043541193008423


Validating: 0it [00:00, ?it/s]

epoch = 113, custom_mae = 0.26939675211906433


Validating: 0it [00:00, ?it/s]

epoch = 114, custom_mae = 0.25005993247032166


Validating: 0it [00:00, ?it/s]

epoch = 115, custom_mae = 0.25243785977363586


Validating: 0it [00:00, ?it/s]

epoch = 116, custom_mae = 0.24983447790145874


Validating: 0it [00:00, ?it/s]

epoch = 117, custom_mae = 0.24880367517471313


Validating: 0it [00:00, ?it/s]

epoch = 118, custom_mae = 0.24790920317173004


Validating: 0it [00:00, ?it/s]

epoch = 119, custom_mae = 0.2486611008644104


Validating: 0it [00:00, ?it/s]

epoch = 120, custom_mae = 0.2519163489341736


Validating: 0it [00:00, ?it/s]

epoch = 121, custom_mae = 0.24991929531097412


Validating: 0it [00:00, ?it/s]

epoch = 122, custom_mae = 0.24970681965351105


Validating: 0it [00:00, ?it/s]

epoch = 123, custom_mae = 0.24853506684303284


Validating: 0it [00:00, ?it/s]

epoch = 124, custom_mae = 0.24873697757720947


Validating: 0it [00:00, ?it/s]

epoch = 125, custom_mae = 0.24654139578342438


Validating: 0it [00:00, ?it/s]

epoch = 126, custom_mae = 0.24601641297340393


Validating: 0it [00:00, ?it/s]

epoch = 127, custom_mae = 0.2451343685388565


Validating: 0it [00:00, ?it/s]

epoch = 128, custom_mae = 0.24697601795196533


Validating: 0it [00:00, ?it/s]

epoch = 129, custom_mae = 0.2482604831457138


Validating: 0it [00:00, ?it/s]

epoch = 130, custom_mae = 0.24653320014476776


Validating: 0it [00:00, ?it/s]

epoch = 131, custom_mae = 0.24675032496452332


Validating: 0it [00:00, ?it/s]

epoch = 132, custom_mae = 0.2474113553762436


Validating: 0it [00:00, ?it/s]

epoch = 133, custom_mae = 0.24716860055923462


Validating: 0it [00:00, ?it/s]

epoch = 134, custom_mae = 0.24604591727256775


Validating: 0it [00:00, ?it/s]

epoch = 135, custom_mae = 0.24453455209732056


Validating: 0it [00:00, ?it/s]

epoch = 136, custom_mae = 0.24583286046981812


Validating: 0it [00:00, ?it/s]

epoch = 137, custom_mae = 0.24524781107902527


Validating: 0it [00:00, ?it/s]

epoch = 138, custom_mae = 0.24447187781333923


Validating: 0it [00:00, ?it/s]

epoch = 139, custom_mae = 0.245356947183609


Validating: 0it [00:00, ?it/s]

epoch = 140, custom_mae = 0.24770058691501617


Validating: 0it [00:00, ?it/s]

epoch = 141, custom_mae = 0.24594055116176605


Validating: 0it [00:00, ?it/s]

epoch = 142, custom_mae = 0.24377427995204926


Validating: 0it [00:00, ?it/s]

epoch = 143, custom_mae = 0.24781158566474915


Validating: 0it [00:00, ?it/s]

epoch = 144, custom_mae = 0.2444905787706375


Validating: 0it [00:00, ?it/s]

epoch = 145, custom_mae = 0.2437036782503128


Validating: 0it [00:00, ?it/s]

epoch = 146, custom_mae = 0.24857880175113678


Validating: 0it [00:00, ?it/s]

epoch = 147, custom_mae = 0.24313384294509888


Validating: 0it [00:00, ?it/s]

epoch = 148, custom_mae = 0.24314790964126587


Validating: 0it [00:00, ?it/s]

epoch = 149, custom_mae = 0.24282482266426086


Validating: 0it [00:00, ?it/s]

epoch = 150, custom_mae = 0.24181687831878662


Validating: 0it [00:00, ?it/s]

epoch = 151, custom_mae = 0.24566838145256042


Validating: 0it [00:00, ?it/s]

epoch = 152, custom_mae = 0.24425895512104034


Validating: 0it [00:00, ?it/s]

epoch = 153, custom_mae = 0.24397186934947968


Validating: 0it [00:00, ?it/s]

epoch = 154, custom_mae = 0.2430107146501541


Validating: 0it [00:00, ?it/s]

epoch = 155, custom_mae = 0.2417331337928772


Validating: 0it [00:00, ?it/s]

epoch = 156, custom_mae = 0.24127709865570068


Validating: 0it [00:00, ?it/s]

epoch = 157, custom_mae = 0.24375028908252716


Validating: 0it [00:00, ?it/s]

epoch = 158, custom_mae = 0.24333736300468445


Validating: 0it [00:00, ?it/s]

epoch = 159, custom_mae = 0.24114501476287842


Validating: 0it [00:00, ?it/s]

epoch = 160, custom_mae = 0.24235960841178894


Validating: 0it [00:00, ?it/s]

epoch = 161, custom_mae = 0.24387650191783905


Validating: 0it [00:00, ?it/s]

epoch = 162, custom_mae = 0.2413032054901123


Validating: 0it [00:00, ?it/s]

epoch = 163, custom_mae = 0.24364420771598816


Validating: 0it [00:00, ?it/s]

epoch = 164, custom_mae = 0.2424670308828354


Validating: 0it [00:00, ?it/s]

epoch = 165, custom_mae = 0.24185511469841003


Validating: 0it [00:00, ?it/s]

epoch = 166, custom_mae = 0.24117128551006317


Validating: 0it [00:00, ?it/s]

epoch = 167, custom_mae = 0.2424057424068451


Validating: 0it [00:00, ?it/s]

epoch = 168, custom_mae = 0.23630450665950775


Validating: 0it [00:00, ?it/s]

epoch = 169, custom_mae = 0.23574422299861908


Validating: 0it [00:00, ?it/s]

epoch = 170, custom_mae = 0.23591028153896332


Validating: 0it [00:00, ?it/s]

epoch = 171, custom_mae = 0.23605641722679138


Validating: 0it [00:00, ?it/s]

epoch = 172, custom_mae = 0.23567421734333038


Validating: 0it [00:00, ?it/s]

epoch = 173, custom_mae = 0.23665045201778412


Validating: 0it [00:00, ?it/s]

epoch = 174, custom_mae = 0.23595471680164337


Validating: 0it [00:00, ?it/s]

epoch = 175, custom_mae = 0.2355899214744568


Validating: 0it [00:00, ?it/s]

epoch = 176, custom_mae = 0.2358495593070984


Validating: 0it [00:00, ?it/s]

epoch = 177, custom_mae = 0.23558932542800903


Validating: 0it [00:00, ?it/s]

epoch = 178, custom_mae = 0.23585757613182068


Validating: 0it [00:00, ?it/s]

epoch = 179, custom_mae = 0.2356303185224533


Validating: 0it [00:00, ?it/s]

epoch = 180, custom_mae = 0.23575711250305176


Validating: 0it [00:00, ?it/s]

epoch = 181, custom_mae = 0.23555061221122742


Validating: 0it [00:00, ?it/s]

epoch = 182, custom_mae = 0.23578253388404846


Validating: 0it [00:00, ?it/s]

epoch = 183, custom_mae = 0.2352772206068039


Validating: 0it [00:00, ?it/s]

epoch = 184, custom_mae = 0.23570546507835388


Validating: 0it [00:00, ?it/s]

epoch = 185, custom_mae = 0.23540054261684418


Validating: 0it [00:00, ?it/s]

epoch = 186, custom_mae = 0.23532918095588684


Validating: 0it [00:00, ?it/s]

epoch = 187, custom_mae = 0.2357015162706375


Validating: 0it [00:00, ?it/s]

epoch = 188, custom_mae = 0.2356938123703003


Validating: 0it [00:00, ?it/s]

epoch = 189, custom_mae = 0.23577509820461273


Validating: 0it [00:00, ?it/s]

epoch = 190, custom_mae = 0.2355286329984665


Validating: 0it [00:00, ?it/s]

epoch = 191, custom_mae = 0.23582527041435242


Validating: 0it [00:00, ?it/s]

epoch = 192, custom_mae = 0.2348097413778305


Validating: 0it [00:00, ?it/s]

epoch = 193, custom_mae = 0.23480325937271118


Validating: 0it [00:00, ?it/s]

epoch = 194, custom_mae = 0.23485708236694336


Validating: 0it [00:00, ?it/s]

epoch = 195, custom_mae = 0.23486566543579102


Validating: 0it [00:00, ?it/s]

epoch = 196, custom_mae = 0.23508116602897644


Validating: 0it [00:00, ?it/s]

epoch = 197, custom_mae = 0.23493275046348572


Validating: 0it [00:00, ?it/s]

epoch = 198, custom_mae = 0.23501606285572052


Validating: 0it [00:00, ?it/s]

epoch = 199, custom_mae = 0.23489944636821747
train done.
validate done.
fold = 0, auc = 0.23480322179150553
inference done.


In [32]:
if len(CFG.folds) != CFG.n_folds:

    oof_score = get_score(oof_pred, oof_target, val_df['u_out'].values)
    print(f'MAE {oof_score}')

    oof_df = train.iloc[val_idxes[0], :1]
    oof_df['pressure'] = oof_pred
    oof_df.to_csv(OUTPUT_DIR / f'oof{CFG.exp_num}.csv',index = False)    
else:
    score = get_score(y, oof_total, train['u_out'].values)
    print(f'MAE {score}: folds: {scores}')

    oof_df = pd.DataFrame({'id': train['id'].values, 'pressure':oof_total.reshape(-1)})
    oof_df.to_csv(OUTPUT_DIR / f'oof{CFG.exp_num}.csv',index = False)
oof_df

MAE 0.23480322179150553


Unnamed: 0,id,pressure
240,241,6.529589
241,242,5.667839
242,243,6.685966
243,244,9.171091
244,245,11.216644
...,...,...
6035995,6035996,31.809345
6035996,6035997,31.502237
6035997,6035998,30.515612
6035998,6035999,30.017593


In [33]:
sub = pd.read_csv(DATA_DIR / 'sample_submission.csv')
sub['pressure'] = np.mean(sub_preds, axis=1)
sub.to_csv(OUTPUT_DIR / f'sub{CFG.exp_num}.csv',index = False)
sub

Unnamed: 0,id,pressure
0,1,6.217560
1,2,5.903419
2,3,7.004230
3,4,7.585358
4,5,9.049248
...,...,...
4023995,4023996,16.077518
4023996,4023997,16.366518
4023997,4023998,16.569006
4023998,4023999,16.681765


In [34]:
wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type='summary')
wandb.run.name = 'summary'
wandb.log({'CV_score': oof_score})
# wandb.save(utils.get_notebook_path())
wandb.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Loss/val,0.23485
custom_mae/val,0.2349
epoch,199.0
trainer/global_step,94199.0
_runtime,4112.0
_timestamp,1633074556.0
_step,200.0
CV_score,0.2348


0,1
Loss/val,█▆▅▄▄▃▃▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
custom_mae/val,█▆▅▄▄▃▃▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
CV_score,▁


[34m[1mwandb[0m: wandb version 0.12.3 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
CV_score,0.2348
_runtime,2.0
_timestamp,1633075464.0
_step,0.0


0,1
CV_score,▁
_runtime,▁
_timestamp,▁
_step,▁
