# Train classification

Most sharing code train this dataset as a regression task.

But in this code, I train as a classification task.

I encode the target value pressure to 950 classes and calculate CrossEntropy Loss.

In [1]:
from IPython.core.display import display, HTML 
display(HTML("<style>.container { width:100% !important; }</style>")) 

In [2]:
# from kaggle_secrets import UserSecretsClient
# secret_label = "wandb"
# secret_value = UserSecretsClient().get_secret(secret_label)
# # !wandb login $secret_value

In [8]:
import gc
import os
import sys
import copy
import random
# import wandb
import math
from pathlib import Path

import numpy as np
import pandas as pd

from sklearn.model_selection import GroupKFold
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

from transformers import AdamW
from transformers import get_cosine_schedule_with_warmup
from sklearn.preprocessing import RobustScaler

device = torch.device("cuda")

In [9]:
sys.path.append('../../src/')
import utils as utils
from utils import Timer

In [10]:
class CFG:
    exp_num = 37
    n_folds = 5
    folds = [0]
    seed = 777
    local = True
    
    lr = 1e-3
    epochs = 50
    emb_dim = 64
    hidden_dim = 256
    weight_decay = 1e-3    
    
    ######################
    # Loaders #
    ######################
    loader_params = {
        "train": {
            'batch_size': 128,
            'shuffle': True,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': True,
        },
        "valid": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        },
        "test": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        }
    }


In [11]:
utils.set_seed(CFG.seed)    

In [12]:
if CFG.local:
    DATA_DIR = Path("/home/knikaido/work/Ventilator-Pressure-Prediction/data/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('./output/')
else:
    DATA_DIR = Path("../input/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('')   

In [13]:
def loss_fn(y_pred, y_true, u_outs):
#     loss = nn.L1Loss()(y_pred.reshape(-1), y_true.reshape(-1))
    w = 1 - u_outs.reshape(-1)
    loss = nn.CrossEntropyLoss(reduction = 'none')(y_pred.reshape(-1, 950), y_true.reshape(-1)).reshape(-1)
    loss = loss * w
    loss = loss.sum() / w.sum()
    return loss

def compute_metric(preds, trues, u_outs):
    """
    Metric for the problem, as I understood it.
    """
    
    y = trues
    w = 1 - u_outs
    
    assert y.shape == preds.shape and w.shape == y.shape, (y.shape, preds.shape, w.shape)
    
    mae = w * np.abs(y - preds)
    mae = mae.sum() / w.sum()
    
    return mae

In [14]:
class VentilatorDataset(Dataset):
    
    def __init__(self, df, train_value_col, train_category_col, label_dic=None):
        self.dfs = [_df for _, _df in df.groupby("breath_id")]
        self.label_dic = label_dic
        
    def __len__(self):
        return len(self.dfs)
    
    def __getitem__(self, item):
        df = self.dfs[item]
        X = df[train_category_col + train_value_col].values
        u_out = df['u_out'].values
        y = df['pressure'].values
        if self.label_dic is None:
            label = [-1]
        else:
            label = [self.label_dic[i] for i in y]

        d = {
            "X": torch.tensor(X).float(),
            "u_out": torch.tensor(u_out).long(),
            "y" : torch.tensor(label).long(),
        }
        return d

In [15]:
class VentilatorModel(nn.Module):
    
    def __init__(self, input_dim):
        super(VentilatorModel, self).__init__()
        self.r_emb = nn.Embedding(3, 2, padding_idx=0)
        self.c_emb = nn.Embedding(3, 2, padding_idx=0)
        self.rc_dot_emb = nn.Embedding(8, 4, padding_idx=0)
        self.rc_sum_emb = nn.Embedding(8, 4, padding_idx=0)
        self.seq_emb = nn.Sequential(
            nn.Linear(12+input_dim, CFG.emb_dim),
            nn.LayerNorm(CFG.emb_dim)
        )
        
        self.lstm = nn.LSTM(CFG.emb_dim, CFG.hidden_dim, batch_first=True, bidirectional=True, dropout=0.2, num_layers=4)

        self.head = nn.Sequential(
            nn.Linear(CFG.hidden_dim * 2, CFG.hidden_dim * 2),
            nn.LayerNorm(CFG.hidden_dim * 2),
            nn.ReLU(),
            nn.Linear(CFG.hidden_dim * 2, 950),
        )
        
        # Encoder
        initrange = 0.1
        self.r_emb.weight.data.uniform_(-initrange, initrange)
        self.c_emb.weight.data.uniform_(-initrange, initrange)
        self.rc_dot_emb.weight.data.uniform_(-initrange, initrange)
        self.rc_sum_emb.weight.data.uniform_(-initrange, initrange)
        
        # LSTM
        for n, m in self.named_modules():
            if isinstance(m, nn.LSTM):
                print(f'init {m}')
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        nn.init.orthogonal_(param.data)
                    else:
                        nn.init.normal_(param.data)

    def forward(self, X, y=None):
        # embed
        bs = X.shape[0]
        r_emb = self.r_emb(X[:,:,0].long()).view(bs, 80, -1)
        c_emb = self.c_emb(X[:,:,1].long()).view(bs, 80, -1)
        rc_dot_emb = self.rc_dot_emb(X[:,:,2].long()).view(bs, 80, -1)
        rc_sum_emb = self.rc_sum_emb(X[:,:,3].long()).view(bs, 80, -1)
        
        seq_x = torch.cat((r_emb, c_emb, rc_dot_emb, rc_sum_emb, X[:, :, 4:]), 2)
        emb_x = self.seq_emb(seq_x)
        
        out, _ = self.lstm(emb_x, None) 
        logits = self.head(out)

        return logits

In [16]:
def train_loop(model, optimizer, scheduler, loader):
    losses, lrs = [], []
    model.train()
    optimizer.zero_grad()
    for d in loader:
        out = model(d['X'].to(device))
        loss = loss_fn(out, d['y'].to(device), d['u_out'].to(device))
        losses.append(loss.item())
        step_lr = np.array([param_group["lr"] for param_group in optimizer.param_groups]).mean()
        lrs.append(step_lr)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        scheduler.step()

    return np.array(losses).mean(), np.array(lrs).mean()


def valid_loop(model, loader, target_dic_inv):
    losses, predicts = [], []
    model.eval()
    for d in loader:
        with torch.no_grad():
            out = model(d['X'].to(device))
            loss = loss_fn(out, d['y'].to(device), d['u_out'].to(device))
        pred = out.reshape(-1, 950).softmax(1)
        pred = torch.sum(torch.tensor(unique_targets).to(device) *  pred, axis=1)
        losses.append(loss.item())
        predicts.append(pred.cpu().numpy())

    return np.array(losses).mean(), np.concatenate(predicts)

def test_loop(model, loader, target_dic_inv):
    predicts = []
    model.eval()
    for d in loader:
        with torch.no_grad():
            out = model(d['X'].to(device))
        pred = out.reshape(-1, 950).softmax(1)
        pred = torch.sum(torch.tensor(unique_targets).to(device) *  pred, axis=1)
        predicts.append(pred.cpu().numpy())

    return np.concatenate(predicts)

In [17]:
def get_raw_features(input_df, dataType = 'train'):
    colum = ['time_step', 'u_in', 'u_out']

    return input_df[colum]

In [18]:
def get_category_features(input_df, dataType = 'train'):
    output_df = copy.deepcopy(input_df)
    c_dic = {10: 0, 20: 1, 50:2}
    r_dic = {5: 0, 20: 1, 50:2}
    rc_sum_dic = {v: i for i, v in enumerate([15, 25, 30, 40, 55, 60, 70, 100])}
    rc_dot_dic = {v: i for i, v in enumerate([50, 100, 200, 250, 400, 500, 2500, 1000])}  
    
    colum = ['C_cate', 'R_cate', 'RC_sum', 'RC_dot']
    output_df['C_cate'] = output_df['C'].map(c_dic)
    output_df['R_cate'] = output_df['R'].map(r_dic)
    output_df['RC_sum'] = (output_df['R'] + output_df['C']).map(rc_sum_dic)
    output_df['RC_dot'] = (output_df['R'] * output_df['C']).map(rc_dot_dic)

    return output_df[colum]

In [19]:
def get_simple_calc_features(input_df, dataType = 'train'):
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    output_df['time_delta'] = output_df.groupby('breath_id')['time_step'].diff().fillna(0)
    output_df['delta'] = output_df['time_delta'] * output_df['u_in']
    output_df['area'] = output_df.groupby('breath_id')['delta'].cumsum()

    output_df['cross']= output_df['u_in']*output_df['u_out']
    output_df['cross2']= output_df['time_step']*output_df['u_out']
    
    output_df['u_in_cumsum'] = (output_df['u_in']).groupby(output_df['breath_id']).cumsum()
    output_df['one'] = 1
    output_df['count'] = (output_df['one']).groupby(output_df['breath_id']).cumsum()
    output_df['u_in_cummean'] =output_df['u_in_cumsum'] / output_df['count']
    
    output_df = output_df.drop(['count','one'], axis=1)
    
    return output_df.iloc[:, c_num:]

In [20]:
def get_diff_shift_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    USE_LAG = [-2, -1, 1, 2, 3, 4]
    
    for lag in USE_LAG:
        output_df[f'breath_id_lag{lag}']=output_df['breath_id'].shift(lag).fillna(0)
        output_df[f'breath_id_lag{lag}same']=np.select([output_df[f'breath_id_lag{lag}']==output_df['breath_id']], [1], 0)

        # u_in 
        output_df[f'u_in_lag_{lag}'] = output_df['u_in'].shift(lag).fillna(0) * output_df[f'breath_id_lag{lag}same']
        output_df[f'u_in_diff_{lag}'] = output_df['u_in'] - output_df[f'u_in_lag_{lag}']
        output_df[f'u_out_lag_{lag}'] = output_df['u_out'].shift(lag).fillna(0) * output_df[f'breath_id_lag{lag}same']

        # breath_time
    output_df[f'time_step_lag_{1}'] = output_df['time_step'].shift(1).fillna(0) * output_df[f'breath_id_lag{1}same']
    output_df[f'time_step_diff_{1}'] = output_df['time_step'] - output_df[f'time_step_lag_{1}']

    drop_columns = ['time_step_lag_1']
    drop_columns = [f'breath_id_lag{i}' for i in USE_LAG]
    drop_columns += [f'breath_id_lag{i}same' for i in USE_LAG]
    output_df = output_df.drop(drop_columns, axis=1)

    # fill na by zero
    output_df = output_df.fillna(0)
    
    return output_df.iloc[:, c_num:]

In [21]:
def get_agg_features(input_df, dataType = 'train'):
    
    output_df = copy.deepcopy(input_df)
    c_num = input_df.shape[1]
    
    # Dict for aggregations
    create_feature_dict = {
        'u_in': [np.max, np.std, np.mean, 'first', 'last'],
    }
    
    def get_agg_window(start_time=0, end_time=3.0, add_suffix = False):
        
        df_tgt = output_df[(output_df['time_step'] >= start_time) & (output_df['time_step'] <= end_time)]
        df_feature = df_tgt.groupby(['breath_id']).agg(create_feature_dict)
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(start_time) + '_' + str(end_time))
            
        return df_feature
    
    df_agg_feature = get_agg_window().reset_index()
    
#     df_tmp = get_agg_window(start_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(start_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 1, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')
#     df_tmp = get_agg_window(end_time = 2, add_suffix = True).reset_index()
#     df_agg_feature = df_agg_feature.merge(df_tmp, how = 'left', on = 'breath_id')

    output_df = pd.merge(output_df, df_agg_feature, how='left', on='breath_id')
    
    output_df['u_in_diffmax'] = output_df['u_in_amax'] - output_df['u_in']
    output_df['u_in_diffmean'] = output_df['u_in_mean'] - output_df['u_in']
    
    return output_df.iloc[:, c_num:]

In [22]:
def to_feature(input_df, dataType = 'train'):
    """input_df を特徴量行列に変換した新しいデータフレームを返す.
    """

    processors = [
        get_raw_features,
        get_simple_calc_features,
        get_diff_shift_features,
#         get_agg_features,
        get_category_features,
    ]

    out_df = pd.DataFrame()

    for func in tqdm(processors, total=len(processors)):
        with Timer(prefix='' + func.__name__ + ' '):
            _df = func(input_df, dataType)

        # 長さが等しいことをチェック (ずれている場合, func の実装がおかしい)
        assert len(_df) == len(input_df), func.__name__
        out_df = pd.concat([out_df, _df], axis=1)
#     out_df = utils.reduce_mem_usage(out_df)
    
    return out_df

In [23]:
train = pd.read_csv(DATA_DIR / 'train.csv')
test = pd.read_csv(DATA_DIR / 'test.csv')
sub_df = pd.read_csv(DATA_DIR / "sample_submission.csv")

In [24]:
display(train), display(test)

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.000000,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.355850,0,12.234987
...,...,...,...,...,...,...,...,...
6035995,6035996,125749,50,10,2.504603,1.489714,1,3.869032
6035996,6035997,125749,50,10,2.537961,1.488497,1,3.869032
6035997,6035998,125749,50,10,2.571408,1.558978,1,3.798729
6035998,6035999,125749,50,10,2.604744,1.272663,1,4.079938


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.000000,0.000000,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.230610,0
4,5,0,5,20,0.127644,26.320956,0
...,...,...,...,...,...,...,...
4023995,4023996,125748,20,10,2.530117,4.971245,1
4023996,4023997,125748,20,10,2.563853,4.975709,1
4023997,4023998,125748,20,10,2.597475,4.979468,1
4023998,4023999,125748,20,10,2.631134,4.982648,1


(None, None)

In [25]:
train_df = to_feature(train, dataType = 'train')
test_df = to_feature(test, dataType = 'test')

  0%|          | 0/4 [00:00<?, ?it/s]

get_raw_features  0.021[s]
get_simple_calc_features  12.124[s]
get_diff_shift_features  2.170[s]
get_category_features  0.405[s]


  0%|          | 0/4 [00:00<?, ?it/s]

get_raw_features  0.014[s]
get_simple_calc_features  8.011[s]
get_diff_shift_features  1.277[s]
get_category_features  0.250[s]


In [26]:
train_category_col = ['C_cate', 'R_cate', 'RC_sum', 'RC_dot']
train_value_col = [i for i in train_df.columns.to_list() if i not in train_category_col]

In [27]:
norm_features = train_value_col
norm_features = sorted(list(set(train_value_col) - set(['u_out'])), key=norm_features.index)
def norm_scale(train_df, test_df):
    scaler = RobustScaler()
    all_u_in = np.vstack([train_df[norm_features].values, test_df[norm_features].values])
    scaler.fit(all_u_in)
    train_df[norm_features] = scaler.transform(train_df[norm_features].values)
    test_df[norm_features] = scaler.transform(test_df[norm_features].values)
    return train_df, test_df

In [28]:
train_df, test_df = norm_scale(train_df, test_df)

In [29]:
train_df = utils.reduce_mem_usage(train_df)
test_df = utils.reduce_mem_usage(test_df)

Mem. usage decreased from 1565.73 Mb to 362.65 Mb (76.8% reduction)
Mem. usage decreased from 1043.82 Mb to 241.77 Mb (76.8% reduction)


In [30]:
train_df = pd.concat([train_df, train[['id', 'breath_id', 'pressure']]], axis=1)
test_df = pd.concat([test_df, test[['id', 'breath_id']]], axis=1)
test_df['pressure'] = train_df['pressure'].values[-1]

In [31]:
unique_targets = sorted(train_df['pressure'].unique().tolist())
target_dic = {v:i for i, v in enumerate(sorted(train_df['pressure'].unique().tolist()))}
target_dic_inv = {v: k for k, v in target_dic.items()}

In [32]:
display(train_df), display(test_df)

Unnamed: 0,time_step,u_in,u_out,time_delta,delta,area,cross,cross2,u_in_cumsum,u_in_cummean,...,u_out_lag_4,time_step_lag_1,time_step_diff_1,C_cate,R_cate,RC_sum,RC_dot,id,breath_id,pressure
0,-0.989258,-0.937988,0,-15.476562,-0.861328,-0.734375,0.000000,-0.665527,-0.724121,-0.647461,...,-1.0,-0.964844,-15.476562,2,1,6,7,1,1,5.837492
1,-0.963867,3.054688,0,0.104309,2.867188,-0.682617,0.000000,-0.665527,-0.675781,0.162842,...,-1.0,-0.964844,0.104309,2,1,6,7,2,1,5.907794
2,-0.937988,3.955078,0,0.201538,3.732422,-0.619141,0.000000,-0.665527,-0.616699,0.554688,...,-1.0,-0.939453,0.201538,2,1,6,7,3,1,7.876254
3,-0.912109,4.019531,0,0.278320,3.816406,-0.554688,0.000000,-0.665527,-0.556641,0.757324,...,-1.0,-0.914062,0.278320,2,1,6,7,4,1,11.742872
4,-0.886230,4.574219,0,0.364258,4.367188,-0.482666,0.000000,-0.665527,-0.489746,0.923828,...,-1.0,-0.888184,0.364258,2,1,6,7,5,1,12.234987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035995,0.904785,-0.631348,1,-0.007175,-0.561523,0.322998,0.333008,0.608887,0.293945,-0.203613,...,0.0,0.905273,-0.007175,0,2,5,5,6035996,125749,3.869032
6035996,0.930176,-0.631348,1,-0.032135,-0.562012,0.327148,0.332764,0.625977,0.297852,-0.207764,...,0.0,0.930664,-0.032135,0,2,5,5,6035997,125749,3.869032
6035997,0.955078,-0.616211,1,0.009163,-0.546875,0.331543,0.348389,0.642578,0.301758,-0.211792,...,0.0,0.956055,0.009163,0,2,5,5,6035998,125749,3.798729
6035998,0.980469,-0.678711,1,-0.041748,-0.605469,0.334961,0.284424,0.659668,0.305176,-0.215942,...,0.0,0.981445,-0.041748,0,2,5,5,6035999,125749,4.079938


Unnamed: 0,time_step,u_in,u_out,time_delta,delta,area,cross,cross2,u_in_cumsum,u_in_cummean,...,u_out_lag_4,time_step_lag_1,time_step_diff_1,C_cate,R_cate,RC_sum,RC_dot,id,breath_id,pressure
0,-0.989258,-0.956055,0,-15.476562,-0.861328,-0.734375,0.000000,-0.665527,-0.724121,-0.654785,...,-1.0,-0.964844,-15.476562,1,0,1,1,1,0,3.869032
1,-0.964844,0.683594,0,-0.705566,0.583984,-0.714355,0.000000,-0.665527,-0.704590,-0.322021,...,-1.0,-0.964844,-0.705566,1,0,1,1,2,0,3.869032
2,-0.940918,2.240234,0,-0.696289,1.958008,-0.675293,0.000000,-0.665527,-0.666016,-0.000423,...,-1.0,-0.940918,-0.696289,1,0,1,1,3,0,3.869032
3,-0.916504,3.675781,0,-0.696289,3.224609,-0.619141,0.000000,-0.665527,-0.610352,0.305908,...,-1.0,-0.916504,-0.696289,1,0,1,1,4,0,3.869032
4,-0.892578,4.785156,0,-0.710449,4.199219,-0.549316,0.000000,-0.665527,-0.541016,0.580078,...,-1.0,-0.892578,-0.710449,1,0,1,1,5,0,3.869032
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4023995,0.923828,0.128418,1,0.151123,0.150024,-0.216797,1.111328,0.621582,-0.212158,-0.427734,...,0.0,0.924805,0.151123,0,1,2,2,4023996,125748,3.869032
4023996,0.949707,0.129395,1,0.143188,0.150513,-0.202881,1.112305,0.639160,-0.199097,-0.425049,...,0.0,0.950195,0.143188,0,1,2,2,4023997,125748,3.869032
4023997,0.975098,0.130249,1,0.090332,0.147827,-0.188843,1.113281,0.656250,-0.185913,-0.422363,...,0.0,0.975586,0.090332,0,1,2,2,4023998,125748,3.869032
4023998,1.000000,0.130859,1,0.107300,0.149536,-0.174927,1.114258,0.673340,-0.172852,-0.419678,...,0.0,1.000977,0.107300,0,1,2,2,4023999,125748,3.869032


(None, None)

In [33]:
oof = np.zeros(len(train_df))
test_preds_lst = []
input_dim = len(train_value_col)
train_df['pred'] = 0

gkf = GroupKFold(n_splits=CFG.n_folds).split(train_df, train_df.pressure, groups=train_df.breath_id)
for fold, (_, valid_idx) in enumerate(gkf):
    train_df.loc[valid_idx, 'fold'] = fold
    
for i, fold in enumerate(range(CFG.n_folds)):
    if i not in CFG.folds:
        continue
    print(f'Fold-{fold}')
    
    trn_df = train_df.query(f"fold!={fold}").reset_index(drop=True)
    val_df = train_df.query(f"fold=={fold}").reset_index(drop=True)
    
    loaders = {
        phase: DataLoader(
            VentilatorDataset(
                df_, train_value_col, train_category_col, target_dic
            ),
            **CFG.loader_params[phase])  # type: ignore
        for phase, df_ in zip(["train", "valid", "test"], [trn_df, val_df, test_df])
    }
    

    model = VentilatorModel(input_dim)
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    num_train_steps = int(len(loaders['train']) * CFG.epochs)
    num_warmup_steps = int(num_train_steps / 10)
    scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_train_steps)

    model_path = OUTPUT_DIR / f"ventilator_f{fold}_best_model.bin"

    valid_best_score = float('inf')
    valid_best_score_mask = float('inf')
    for epoch in tqdm(range(CFG.epochs)):
        train_loss, lrs = train_loop(model, optimizer, scheduler, loaders['train'])
        valid_loss, valid_predict = valid_loop(model, loaders['valid'], target_dic_inv)
       
        valid_score_mask = compute_metric(valid_predict, val_df['pressure'].values, val_df['u_out'].values)

        print(f"epoch = {epoch}, valid mask score = {valid_score_mask}:")

        if valid_score_mask < valid_best_score_mask:
            valid_best_score_mask = valid_score_mask
            torch.save(model.state_dict(), model_path)
            oof[train_df.query(f"fold=={fold}").index.values] = valid_predict

        torch.cuda.empty_cache()
        gc.collect()

    model.load_state_dict(torch.load(model_path))
    test_preds = test_loop(model, loaders['test'], target_dic_inv)
    test_preds_lst.append(test_preds)
    
    sub_df['pressure'] = test_preds
    sub_df.to_csv(OUTPUT_DIR / f"sub_f{fold}.csv", index=None)
    
    valid_loss, valid_predict = valid_loop(model, loaders['valid'], target_dic_inv)
    valid_score_mask = compute_metric(valid_predict, val_df['pressure'].values, val_df['u_out'].values)
    print(f"fold = {epoch}, valid mask score = {valid_score_mask}:")
    train_df.loc[train_df['fold'] == fold, 'pred'] = valid_predict
    train_df.loc[train_df['fold'] == fold, ['id', 'pred']].to_csv(OUTPUT_DIR / f"oof_{fold}.csv", index=None)

    torch.cuda.empty_cache()
    gc.collect()

Fold-0
init LSTM(64, 256, num_layers=4, batch_first=True, dropout=0.2, bidirectional=True)


  0%|          | 0/50 [00:00<?, ?it/s]

epoch = 0, valid mask score = 2.5296858744519817:
epoch = 1, valid mask score = 1.3358765486353898:
epoch = 2, valid mask score = 1.074530428752434:
epoch = 3, valid mask score = 0.7619331378959698:
epoch = 4, valid mask score = 0.629637390246245:
epoch = 5, valid mask score = 0.5542591586977806:
epoch = 6, valid mask score = 0.48086640665734875:
epoch = 7, valid mask score = 0.4378759204845493:
epoch = 8, valid mask score = 0.43074324685203796:
epoch = 9, valid mask score = 0.3900196181163999:
epoch = 10, valid mask score = 0.36762990913596266:
epoch = 11, valid mask score = 0.3834599836935184:
epoch = 12, valid mask score = 0.32264308655989876:
epoch = 13, valid mask score = 0.32174115189326413:
epoch = 14, valid mask score = 0.29644438175995447:
epoch = 15, valid mask score = 0.2930329902669567:
epoch = 16, valid mask score = 0.2766001490322949:
epoch = 17, valid mask score = 0.27129780945632115:
epoch = 18, valid mask score = 0.27434808387560705:
epoch = 19, valid mask score = 0.28

In [34]:
valid_score_mask = compute_metric(train_df['pred'].values, train_df['pressure'].values, train_df['u_out'].values)
print("CV:", valid_score_mask)

CV: 0.16520278206934086


In [33]:
oof_df = train_df.loc[:, ['id', 'pred']]
oof_df.to_csv(OUTPUT_DIR / "oof_total.csv", index=None)

In [35]:
sub_df['pressure'] = np.median(np.stack(test_preds_lst), axis=0)
sub_df.to_csv(OUTPUT_DIR / "submission_median.csv", index=None)

# Post Processing: https://www.kaggle.com/snnclsr/a-dummy-approach-to-improve-your-score-postprocess
unique_pressures = train_df["pressure"].unique()
sorted_pressures = np.sort(unique_pressures)
total_pressures_len = len(sorted_pressures)

def find_nearest(prediction):
    insert_idx = np.searchsorted(sorted_pressures, prediction)
    if insert_idx == total_pressures_len:
        # If the predicted value is bigger than the highest pressure in the train dataset,
        # return the max value.
        return sorted_pressures[-1]
    elif insert_idx == 0:
        # Same control but for the lower bound.
        return sorted_pressures[0]
    lower_val = sorted_pressures[insert_idx - 1]
    upper_val = sorted_pressures[insert_idx]
    return lower_val if abs(lower_val - prediction) < abs(upper_val - prediction) else upper_val

sub_df = pd.read_csv(OUTPUT_DIR / "submission_median.csv")
sub_df["pressure"] = sub_df["pressure"].apply(find_nearest)
sub_df.to_csv(OUTPUT_DIR / "submission_median_pp.csv", index=None)

In [None]:
epoch = 0, valid mask score = 2.4234156269360065:
epoch = 1, valid mask score = 1.383247832311501:
epoch = 2, valid mask score = 0.9912448731882307:
epoch = 3, valid mask score = 0.7400152983110956:
epoch = 4, valid mask score = 0.5968827760744291:
epoch = 5, valid mask score = 0.5357092961127973:
epoch = 6, valid mask score = 0.49782050088029284:
epoch = 7, valid mask score = 0.46189086218909464:
epoch = 8, valid mask score = 0.46639998713624786:
epoch = 9, valid mask score = 0.39820255288985984:
epoch = 10, valid mask score = 0.42318801561001596:
epoch = 11, valid mask score = 0.3319068540521283:
epoch = 12, valid mask score = 0.3408059985361622:
epoch = 13, valid mask score = 0.3157559988809042:
epoch = 14, valid mask score = 0.3323617405615805:
epoch = 15, valid mask score = 0.3013156729228662:
epoch = 16, valid mask score = 0.28851178839627317:
epoch = 17, valid mask score = 0.2779152339340179:
epoch = 18, valid mask score = 0.265578229199995:
epoch = 19, valid mask score = 0.2636193531637639:
epoch = 20, valid mask score = 0.27658346187845323:
epoch = 21, valid mask score = 0.24165807487577698:
epoch = 22, valid mask score = 0.24668171627926436:
epoch = 23, valid mask score = 0.23368934246152018:
epoch = 24, valid mask score = 0.23662942127983735:
epoch = 25, valid mask score = 0.2185424074526554:
epoch = 26, valid mask score = 0.22294171596488138:
epoch = 27, valid mask score = 0.21901644313919094:
epoch = 28, valid mask score = 0.21635140636997818:
epoch = 29, valid mask score = 0.21670185982004297:
epoch = 30, valid mask score = 0.20808531591259144:
epoch = 31, valid mask score = 0.19915114681861815:
epoch = 32, valid mask score = 0.19995490953433046:
epoch = 33, valid mask score = 0.19182868413946977:
epoch = 34, valid mask score = 0.1931657425126711:
epoch = 35, valid mask score = 0.1905662102329643:
epoch = 36, valid mask score = 0.18657815913266282:
epoch = 37, valid mask score = 0.1872539230612827:
epoch = 38, valid mask score = 0.1858137426459775:
epoch = 39, valid mask score = 0.18056411809036885:
epoch = 40, valid mask score = 0.18170819818313316:
epoch = 41, valid mask score = 0.17919699223268598:
epoch = 42, valid mask score = 0.17760578144562356:
epoch = 43, valid mask score = 0.1765242556640338:
epoch = 44, valid mask score = 0.17534026466427421:
epoch = 45, valid mask score = 0.1753085874882145:
epoch = 46, valid mask score = 0.17519446015457515:
epoch = 47, valid mask score = 0.17469153103606444:
epoch = 48, valid mask score = 0.1748151144839217:
epoch = 49, valid mask score = 0.17469064969098191: