In [1]:
# ====================================================
# Directory settings
# ====================================================

import os

OUTPUT_DIR = './out/'
DATA_DIR = "./data/"

# ====================================================
# CFG
# ====================================================

class CFG:
    competition = 'ventilator'
    apex = True
    print_freq = 1000
    num_workers = 4
    model_name = 'rnn'
    scheduler = 'CosineAnnealingWarmRestarts' # ['linear', 'cosine', 'ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    batch_scheduler = False
    #num_warmup_steps=100 # ['linear', 'cosine']
    #num_cycles=0.5 # 'cosine'
    factor = 0.7 # ReduceLROnPlateau
    patience = 5 # ReduceLROnPlateau
    eps = 1e-6 # ReduceLROnPlateau
    T_max = 50 # CosineAnnealingLR
    T_0 = 50 # CosineAnnealingWarmRestarts
    epochs = 200
    max_grad_norm = 1000
    gradient_accumulation_steps = 1
    hidden_size = 512
    lr = 1e-3
    min_lr = 1e-5
    weight_decay = 1e-6
    batch_size = 256
    n_fold = 5
    trn_fold = list(range(5))
    cate_seq_cols = []
    cont_seq_cols = ['R', 'C', 'time_step', 'u_in', 'u_out']
    train = True
    inference = True
    debug = False

if CFG.debug:
    CFG.epochs = 50
    CFG.trn_fold = [0]

In [2]:
# ====================================================
# Library
# ====================================================

import os
import gc
import sys
import json
import math
import random
from time import time
from datetime import datetime
from collections import Counter, defaultdict

import scipy as sp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

from tqdm.notebook import tqdm

from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from torch.cuda.amp import GradScaler
from torch.cuda.amp import autocast

from transformers import AdamW
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

import warnings
warnings.filterwarnings("ignore")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.is_available())
torch.cuda.empty_cache()

True


In [3]:
# ====================================================
# Utils
# ====================================================

def get_score(y_trues, y_preds):
    score = mean_absolute_error(y_trues, y_preds)
    return score

def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything()

def decorate(s: str, decoration=None):
    if decoration is None:
        decoration = '★' * 20
    return ' '.join([decoration, str(s), decoration])

class Timer:
    
    def __init__(self, logger=None, format_str='{:.3f}[s]', prefix=None, suffix=None, sep=' ', verbose=0):
        if prefix: format_str = str(prefix) + sep + format_str
        if suffix: format_str = format_str + sep + str(suffix)
        self.format_str = format_str
        self.logger = logger
        self.start = None
        self.end = None
        self.verbose = verbose

    @property
    def duration(self):
        if self.end is None:
            return 0
        return self.end - self.start

    def __enter__(self):
        self.start = time()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.end = time()
        if self.verbose is None:
            return
        out_str = self.format_str.format(self.duration)
        if self.logger:
            self.logger.info(out_str)
        else:
            print(out_str)

In [4]:
# ====================================================
# Data Loading
# ====================================================

train = pd.read_csv(DATA_DIR + 'train.csv')
if CFG.debug:
    train = train[:80*5000]
test = pd.read_csv(DATA_DIR + 'test.csv')

In [5]:
class AbstractBaseBlock:
    
    def fit(self, input_df: pd.DataFrame, y=None):
        return self.transform(input_df)

    def transform(self, input_df: pd.DataFrame) -> pd.DataFrame:
        raise NotImplementedError()
        
class RCDummy(AbstractBaseBlock):
    
    def transform(self, input_df):
        input_df['R_dummy'] = input_df['R'].astype(str)
        input_df['C_dummy'] = input_df['C'].astype(str)
        output_df = pd.get_dummies(input_df[["R_dummy", "C_dummy"]])
        CFG.cont_seq_cols += output_df.add_suffix(f'@{self.__class__.__name__}').columns.tolist()
        return output_df

class MultiplyingDividing(AbstractBaseBlock):
    
    def transform(self, input_df):
        input_df['u_in_cumsum'] = input_df['u_in'].groupby(input_df['breath_id']).cumsum()
        input_df['one'] = 1
        input_df['count'] = input_df['one'].groupby(input_df['breath_id']).cumsum()
        input_df['u_in_cummean'] = input_df['u_in_cumsum'] / input_df['count']
        
        input_df = input_df.merge(
            input_df[input_df["u_out"]==0].groupby('breath_id')['u_in'].agg(["mean", "std", "max"]).add_prefix("u_out0_").reset_index(),
            on="breath_id"
        )
        input_df = input_df.merge(
            input_df[input_df["u_out"]==1].groupby('breath_id')['u_in'].agg(["mean", "std", "max"]).add_prefix("u_out1_").reset_index(),
            on="breath_id"
        )

        output_df = pd.DataFrame(
            {
                "u_in_cumsum": input_df['u_in_cumsum'],
                "u_in_cummean": input_df['u_in_cummean'],
                "u_out0_mean_diff": input_df['u_in'] - input_df['u_out0_mean'],
                "u_out0_max_diff": input_df['u_in'] - input_df['u_out0_max'],
                #"u_out0_std": input_df['u_out0_std'],
                #"u_out1_mean": input_df['u_out1_mean'],
                #"u_out1_max": input_df['u_out1_max'],
                #"u_out1_std": input_df['u_out1_std'],
            }
        )
        CFG.cont_seq_cols += output_df.add_suffix(f'@{self.__class__.__name__}').columns.tolist()
        return output_df

class DerivsAndInts(AbstractBaseBlock):
    
    def transform(self, input_df):
        output_df = pd.DataFrame()
        input_df["dt"] = input_df.groupby('breath_id')['time_step'].shift(-1) - input_df['time_step']
        input_df["dt"].fillna((input_df["dt"].mean()), inplace=True)
        
        input_df["u_in_diffm1"] = (input_df['u_in'].shift(-1) - input_df['u_in']).fillna(method='ffill')
        input_df["u_in_diffm1_p1"] = input_df.groupby('breath_id')["u_in_diffm1"].shift(1).fillna(0)
        input_df["u_in_diffm1_p2"] = input_df.groupby('breath_id')["u_in_diffm1"].shift(2).fillna(0)
        input_df["u_in_diffm1_m1"] = input_df.groupby('breath_id')["u_in_diffm1"].shift(-1).fillna(0)
        input_df["u_in_diffm1_m2"] = input_df.groupby('breath_id')["u_in_diffm1"].shift(-2).fillna(0)
        #output_df["u_in_derrm1"] = output_df["u_in_diffm1"] / output_df["dt"]
        
        #output_df["u_in_diff2m1"] = (output_df['u_in_derrm1'] - output_df['u_in_derrm1'].shift(-1)).fillna(method='ffill')
        input_df["u_in_diff2m1"] = (input_df['u_in_diffm1'].shift(-1) - input_df['u_in_diffm1']).fillna(method='ffill')
        input_df["u_in_diff2m1_p1"] = input_df.groupby('breath_id')["u_in_diff2m1"].shift(1).fillna(0)
        input_df["u_in_diff2m1_m1"] = input_df.groupby('breath_id')["u_in_diff2m1"].shift(-1).fillna(0)
        #output_df["u_in_derr2m1"] = output_df["u_in_diff2m1"] / output_df["dt"]

        input_df['area'] = input_df['dt'] * input_df['u_in']
        input_df['area_p1'] = input_df.groupby('breath_id')['area'].shift(1).fillna(0)
        input_df['area_m1'] = input_df.groupby('breath_id')['area'].shift(-1).fillna(0)
        input_df['area_tot'] = input_df.groupby('breath_id')['area'].cumsum()
        #input_df['area2'] = output_df['dt'] * output_df['area']
        #output_df['area2'] = input_df.groupby('breath_id')['area2'].cumsum()
        
        #input_df['one'] = 1
        #input_df['count'] = (input_df['one']).groupby(input_df['breath_id']).cumsum()
        #output_df['area_mean'] = output_df['area'] / input_df['count']
        
        output_df = pd.DataFrame(
            {
                "dt": input_df["dt"],
                "u_in_diffm1": input_df["u_in_diffm1"],
                "u_in_diffm1_p1": input_df["u_in_diffm1_p1"],
                "u_in_diffm1_p2": input_df["u_in_diffm1_p2"],
                "u_in_diffm1_m1": input_df["u_in_diffm1_m1"],
                "u_in_diffm1_m2": input_df["u_in_diffm1_m2"],
                "u_in_diff2m1": input_df["u_in_diff2m1"],
                "u_in_diff2m1_p1": input_df["u_in_diff2m1_p1"],
                "u_in_diff2m1_m1": input_df["u_in_diff2m1_m1"],
                "area": input_df["area"],
                "area_m1": input_df["area_m1"],
                "area_p1": input_df["area_p1"],
                "area_tot": input_df["area_tot"],
            }
        )
        
        CFG.cont_seq_cols += output_df.add_suffix(f'@{self.__class__.__name__}').columns.tolist()
        return output_df

class LagFeatures(AbstractBaseBlock):
    
    def transform(self, input_df):
        output_df = pd.DataFrame(
            {
                "u_in_p1": input_df.groupby("breath_id")["u_in"].shift(1).fillna(0),
                "u_in_p2": input_df.groupby("breath_id")["u_in"].shift(2).fillna(0),
                "u_in_p3": input_df.groupby("breath_id")["u_in"].shift(3).fillna(0),
                "u_in_p4": input_df.groupby("breath_id")["u_in"].shift(4).fillna(0),
                "u_in_m1": input_df.groupby("breath_id")["u_in"].shift(-1).fillna(0),
                "u_in_m2": input_df.groupby("breath_id")["u_in"].shift(-2).fillna(0),
                "u_in_m3": input_df.groupby("breath_id")["u_in"].shift(-3).fillna(0),
                "u_in_m4": input_df.groupby("breath_id")["u_in"].shift(-4).fillna(0),
                #"u_out_p1": input_df.groupby("breath_id")["u_out"].shift(1).fillna(0),
                #"u_out_p2": input_df.groupby("breath_id")["u_out"].shift(2).fillna(0),
                #"u_out_p3": input_df.groupby("breath_id")["u_out"].shift(3).fillna(0),
                #"u_out_p4": input_df.groupby("breath_id")["u_out"].shift(4).fillna(0),
                #"u_out_m1": input_df.groupby("breath_id")["u_out"].shift(-1).fillna(0),
                #"u_out_m2": input_df.groupby("breath_id")["u_out"].shift(-2).fillna(0),
                #"u_out_m3": input_df.groupby("breath_id")["u_out"].shift(-3).fillna(0),
                #"u_out_m4": input_df.groupby("breath_id")["u_out"].shift(-4).fillna(0),
            }
        )
            
        CFG.cont_seq_cols += output_df.add_suffix(f'@{self.__class__.__name__}').columns.tolist()
        return output_df
    
class RollingFeatures(AbstractBaseBlock):
    
    def transform(self, input_df):
        output_df = pd.DataFrame()
        output_df["u_in_rolling_mean2"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).mean()["u_in"].reset_index(drop=True)
        output_df["u_in_rolling_mean4"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).mean()["u_in"].reset_index(drop=True)
        #output_df["u_in_rolling_mean8"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(8).mean()["u_in"].reset_index(drop=True)
        output_df["u_in_rolling_max2"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).max()["u_in"].reset_index(drop=True)
        output_df["u_in_rolling_max4"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).max()["u_in"].reset_index(drop=True)
        #output_df["u_in_rolling_max8"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(8).max()["u_in"].reset_index(drop=True)
        output_df["u_in_rolling_min2"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).min()["u_in"].reset_index(drop=True)
        output_df["u_in_rolling_min4"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).min()["u_in"].reset_index(drop=True)
        #output_df["u_in_rolling_min8"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(8).min()["u_in"].reset_index(drop=True)
        #output_df["u_in_rolling_std2"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(2).std()["u_in"].reset_index(drop=True)
        #output_df["u_in_rolling_std4"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(4).std()["u_in"].reset_index(drop=True)
        #output_df["u_in_rolling_std8"] = input_df[["breath_id", "u_in"]].groupby("breath_id").rolling(8).std()["u_in"].reset_index(drop=True)
        for col in output_df.columns:
            output_df[col] = output_df[col].fillna(output_df[col].mean())
            
        CFG.cont_seq_cols += output_df.add_suffix(f'@{self.__class__.__name__}').columns.tolist()
        return output_df

In [6]:
def run_blocks(input_df, blocks, y=None, test=False):
    out_df = pd.DataFrame()

    print(decorate('start run blocks...'))

    with Timer(prefix='run test={}'.format(test)):
        for block in feature_blocks:
            with Timer(prefix='out_df shape: {} \t- {}'.format(out_df.shape, str(block))):
                if not test:
                    out_i = block.fit(input_df.copy(), y=y)
                else:
                    out_i = block.transform(input_df.copy())

            assert len(input_df) == len(out_i), block
            name = block.__class__.__name__
            out_df = pd.concat([out_df, out_i.add_suffix(f'@{name}')], axis=1)
    print(f"out_df shape: {out_df.shape}")

    return pd.concat([input_df, out_df], axis=1)

feature_blocks = [
    MultiplyingDividing(),
    DerivsAndInts(),
    LagFeatures(),
    RollingFeatures(),
    RCDummy(),
]

train = run_blocks(train, blocks=feature_blocks)
test = run_blocks(test, blocks=feature_blocks, test=True)
CFG.cont_seq_cols = list(set(CFG.cont_seq_cols))
display(train.head())
display(test.head())

★★★★★★★★★★★★★★★★★★★★ start run blocks... ★★★★★★★★★★★★★★★★★★★★
out_df shape: (0, 0) 	- <__main__.MultiplyingDividing object at 0x7fab283da220> 2.549[s]
out_df shape: (6036000, 4) 	- <__main__.DerivsAndInts object at 0x7fab283dac10> 1.627[s]
out_df shape: (6036000, 17) 	- <__main__.LagFeatures object at 0x7fab283dafd0> 1.151[s]
out_df shape: (6036000, 25) 	- <__main__.RollingFeatures object at 0x7fab283dabb0> 23.246[s]
out_df shape: (6036000, 31) 	- <__main__.RCDummy object at 0x7fab283da310> 6.455[s]
run test=False 36.173[s]
out_df shape: (6036000, 37)
★★★★★★★★★★★★★★★★★★★★ start run blocks... ★★★★★★★★★★★★★★★★★★★★
out_df shape: (0, 0) 	- <__main__.MultiplyingDividing object at 0x7fab283da220> 1.447[s]
out_df shape: (4024000, 4) 	- <__main__.DerivsAndInts object at 0x7fab283dac10> 0.980[s]
out_df shape: (4024000, 17) 	- <__main__.LagFeatures object at 0x7fab283dafd0> 0.664[s]
out_df shape: (4024000, 25) 	- <__main__.RollingFeatures object at 0x7fab283dabb0> 15.572[s]
out_df shape: (402400

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure,u_in_cumsum@MultiplyingDividing,u_in_cummean@MultiplyingDividing,u_out0_mean_diff@MultiplyingDividing,u_out0_max_diff@MultiplyingDividing,dt@DerivsAndInts,u_in_diffm1@DerivsAndInts,u_in_diffm1_p1@DerivsAndInts,u_in_diffm1_p2@DerivsAndInts,u_in_diffm1_m1@DerivsAndInts,u_in_diffm1_m2@DerivsAndInts,u_in_diff2m1@DerivsAndInts,u_in_diff2m1_p1@DerivsAndInts,u_in_diff2m1_m1@DerivsAndInts,area@DerivsAndInts,area_m1@DerivsAndInts,area_p1@DerivsAndInts,area_tot@DerivsAndInts,u_in_p1@LagFeatures,u_in_p2@LagFeatures,u_in_p3@LagFeatures,u_in_p4@LagFeatures,u_in_m1@LagFeatures,u_in_m2@LagFeatures,u_in_m3@LagFeatures,u_in_m4@LagFeatures,u_in_rolling_mean2@RollingFeatures,u_in_rolling_mean4@RollingFeatures,u_in_rolling_max2@RollingFeatures,u_in_rolling_max4@RollingFeatures,u_in_rolling_min2@RollingFeatures,u_in_rolling_min4@RollingFeatures,R_dummy_20@RCDummy,R_dummy_5@RCDummy,R_dummy_50@RCDummy,C_dummy_10@RCDummy,C_dummy_20@RCDummy,C_dummy_50@RCDummy
0,1,1,20,50,0.0,0.083334,0,5.837492,0.083334,0.083334,-22.03649,-28.229702,0.033652,18.299707,0.0,0.0,4.126236,0.299544,-14.173471,0.0,-3.826692,0.002804,0.622489,0.0,0.002804,0.0,0.0,0.0,0.0,18.383041,22.509278,22.808822,25.35585,7.296453,7.122734,8.130658,9.024859,6.462249,5.530754,1,0,0,0,0,1
1,2,1,20,50,0.033652,18.383041,0,5.907794,18.466375,9.233188,-3.736783,-9.929994,0.033862,4.126236,18.299707,0.0,0.299544,2.547028,-3.826692,-14.173471,2.247484,0.622489,0.765942,0.002804,0.625293,0.083334,0.0,0.0,0.0,22.509278,22.808822,25.35585,27.259866,9.233188,7.122734,18.383041,9.024859,0.083334,5.530754,1,0,0,0,0,1
2,3,1,20,50,0.067514,22.509278,0,7.876254,40.975653,13.658551,0.389454,-5.803758,0.034028,0.299544,4.126236,18.299707,2.547028,1.904016,2.247484,-3.826692,-0.643012,0.765942,0.780365,0.622489,1.391235,18.383041,0.083334,0.0,0.0,22.808822,25.35585,27.259866,27.127486,20.44616,7.122734,22.509278,9.024859,18.383041,5.530754,1,0,0,0,0,1
3,4,1,20,50,0.101542,22.808822,0,11.742872,63.784476,15.946119,0.688998,-5.504214,0.034213,2.547028,0.299544,4.126236,1.904016,-0.13238,-0.643012,2.247484,-2.036396,0.780365,0.860634,0.765942,2.1716,22.509278,18.383041,0.083334,0.0,25.35585,27.259866,27.127486,26.807732,22.65905,15.946119,22.808822,22.808822,22.509278,0.083334,1,0,0,0,0,1
4,5,1,20,50,0.135756,25.35585,0,12.234987,89.140326,17.828065,3.236026,-2.957185,0.033942,1.904016,2.547028,0.299544,-0.13238,-0.319754,-2.036396,-0.643012,-0.187374,0.860634,0.927113,0.780365,3.032234,22.808822,22.509278,18.383041,0.083334,27.259866,27.127486,26.807732,27.864715,24.082336,22.264248,25.35585,25.35585,22.808822,18.383041,1,0,0,0,0,1


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,u_in_cumsum@MultiplyingDividing,u_in_cummean@MultiplyingDividing,u_out0_mean_diff@MultiplyingDividing,u_out0_max_diff@MultiplyingDividing,dt@DerivsAndInts,u_in_diffm1@DerivsAndInts,u_in_diffm1_p1@DerivsAndInts,u_in_diffm1_p2@DerivsAndInts,u_in_diffm1_m1@DerivsAndInts,u_in_diffm1_m2@DerivsAndInts,u_in_diff2m1@DerivsAndInts,u_in_diff2m1_p1@DerivsAndInts,u_in_diff2m1_m1@DerivsAndInts,area@DerivsAndInts,area_m1@DerivsAndInts,area_p1@DerivsAndInts,area_tot@DerivsAndInts,u_in_p1@LagFeatures,u_in_p2@LagFeatures,u_in_p3@LagFeatures,u_in_p4@LagFeatures,u_in_m1@LagFeatures,u_in_m2@LagFeatures,u_in_m3@LagFeatures,u_in_m4@LagFeatures,u_in_rolling_mean2@RollingFeatures,u_in_rolling_mean4@RollingFeatures,u_in_rolling_max2@RollingFeatures,u_in_rolling_max4@RollingFeatures,u_in_rolling_min2@RollingFeatures,u_in_rolling_min4@RollingFeatures,R_dummy_20@RCDummy,R_dummy_5@RCDummy,R_dummy_50@RCDummy,C_dummy_10@RCDummy,C_dummy_20@RCDummy,C_dummy_50@RCDummy
0,1,0,5,20,0.0,0.0,0,0.0,0.0,-19.702022,-37.542219,0.031904,7.515046,0.0,0.0,7.13663,6.578935,-0.378416,0.0,-0.557695,0.0,0.239907,0.0,0.0,0.0,0.0,0.0,0.0,7.515046,14.651675,21.23061,26.320956,7.312916,7.138082,8.146446,9.042205,6.479387,5.545624,0,1,0,0,1,0
1,2,0,5,20,0.031904,7.515046,0,7.515046,3.757523,-12.186977,-30.027173,0.031924,7.13663,7.515046,0.0,6.578935,5.090346,-0.557695,-0.378416,-1.488589,0.239907,0.467737,0.0,0.239907,0.0,0.0,0.0,0.0,14.651675,21.23061,26.320956,30.486938,3.757523,7.138082,7.515046,9.042205,0.0,5.545624,0,1,0,0,1,0
2,3,0,5,20,0.063827,14.651675,0,22.166721,7.388907,-5.050347,-22.890543,0.031924,6.578935,7.13663,7.515046,5.090346,4.165982,-1.488589,-0.557695,-0.924365,0.467737,0.677098,0.239907,0.707644,7.515046,0.0,0.0,0.0,21.23061,26.320956,30.486938,33.54595,11.08336,7.138082,14.651675,9.042205,7.515046,5.545624,0,1,0,0,1,0
3,4,0,5,20,0.095751,21.23061,0,43.397331,10.849333,1.528588,-16.311609,0.031893,5.090346,6.578935,7.13663,4.165982,3.059012,-0.924365,-1.488589,-1.106969,0.677098,0.839994,0.467737,1.384742,14.651675,7.515046,0.0,0.0,26.320956,30.486938,33.54595,35.7176,17.941143,10.849333,21.23061,21.23061,14.651675,0.0,0,1,0,0,1,0
4,5,0,5,20,0.127644,26.320956,0,69.718287,13.943657,6.618934,-11.221263,0.031914,4.165982,5.090346,6.578935,3.059012,2.17165,-1.106969,-0.924365,-0.887362,0.839994,0.972967,0.677098,2.224736,21.23061,14.651675,7.515046,0.0,30.486938,33.54595,35.7176,36.971061,23.775783,17.429572,26.320956,26.320956,21.23061,7.515046,0,1,0,0,1,0


In [7]:
print(train.isna().sum())

id                                      0
breath_id                               0
R                                       0
C                                       0
time_step                               0
u_in                                    0
u_out                                   0
pressure                                0
u_in_cumsum@MultiplyingDividing         0
u_in_cummean@MultiplyingDividing        0
u_out0_mean_diff@MultiplyingDividing    0
u_out0_max_diff@MultiplyingDividing     0
dt@DerivsAndInts                        0
u_in_diffm1@DerivsAndInts               0
u_in_diffm1_p1@DerivsAndInts            0
u_in_diffm1_p2@DerivsAndInts            0
u_in_diffm1_m1@DerivsAndInts            0
u_in_diffm1_m2@DerivsAndInts            0
u_in_diff2m1@DerivsAndInts              0
u_in_diff2m1_p1@DerivsAndInts           0
u_in_diff2m1_m1@DerivsAndInts           0
area@DerivsAndInts                      0
area_m1@DerivsAndInts                   0
area_p1@DerivsAndInts             

In [8]:
train_col_order = ["u_out"] + train.columns.drop("u_out").tolist()
test_col_order = ["u_out"] + test.columns.drop("u_out").tolist()
train = train[train_col_order]
test = test[test_col_order]
scaler = RobustScaler()
scaler_targets = [col for col in CFG.cont_seq_cols if col != "u_out"]
print(f"Apply Standerd Scaler to these columns: {scaler_targets}")
for scaler_target in tqdm(scaler_targets):
    scaler.fit(train.loc[:, [scaler_target]])
    train.loc[:, [scaler_target]] = scaler.transform(train.loc[:, [scaler_target]])
    test.loc[:, [scaler_target]] = scaler.transform(test.loc[:, [scaler_target]])
display(train.head())
display(test.head())

Apply Standerd Scaler to these columns: ['u_in_m1@LagFeatures', 'u_in_cumsum@MultiplyingDividing', 'u_in_rolling_mean4@RollingFeatures', 'u_in_rolling_min2@RollingFeatures', 'u_in_m4@LagFeatures', 'u_in_diff2m1_p1@DerivsAndInts', 'u_in_rolling_max2@RollingFeatures', 'C_dummy_10@RCDummy', 'area_tot@DerivsAndInts', 'u_in_diffm1_m1@DerivsAndInts', 'u_in', 'R_dummy_20@RCDummy', 'area@DerivsAndInts', 'u_in_diffm1@DerivsAndInts', 'u_in_rolling_min4@RollingFeatures', 'u_in_p1@LagFeatures', 'u_in_diff2m1@DerivsAndInts', 'dt@DerivsAndInts', 'u_in_diffm1_m2@DerivsAndInts', 'u_in_diffm1_p2@DerivsAndInts', 'u_in_rolling_mean2@RollingFeatures', 'u_in_rolling_max4@RollingFeatures', 'u_in_cummean@MultiplyingDividing', 'area_p1@DerivsAndInts', 'u_in_m3@LagFeatures', 'C_dummy_50@RCDummy', 'R_dummy_50@RCDummy', 'area_m1@DerivsAndInts', 'u_in_m2@LagFeatures', 'C_dummy_20@RCDummy', 'R', 'u_in_p3@LagFeatures', 'u_in_p2@LagFeatures', 'u_out0_max_diff@MultiplyingDividing', 'u_out0_mean_diff@MultiplyingDividi

  0%|          | 0/41 [00:00<?, ?it/s]

Unnamed: 0,u_out,id,breath_id,R,C,time_step,u_in,pressure,u_in_cumsum@MultiplyingDividing,u_in_cummean@MultiplyingDividing,u_out0_mean_diff@MultiplyingDividing,u_out0_max_diff@MultiplyingDividing,dt@DerivsAndInts,u_in_diffm1@DerivsAndInts,u_in_diffm1_p1@DerivsAndInts,u_in_diffm1_p2@DerivsAndInts,u_in_diffm1_m1@DerivsAndInts,u_in_diffm1_m2@DerivsAndInts,u_in_diff2m1@DerivsAndInts,u_in_diff2m1_p1@DerivsAndInts,u_in_diff2m1_m1@DerivsAndInts,area@DerivsAndInts,area_m1@DerivsAndInts,area_p1@DerivsAndInts,area_tot@DerivsAndInts,u_in_p1@LagFeatures,u_in_p2@LagFeatures,u_in_p3@LagFeatures,u_in_p4@LagFeatures,u_in_m1@LagFeatures,u_in_m2@LagFeatures,u_in_m3@LagFeatures,u_in_m4@LagFeatures,u_in_rolling_mean2@RollingFeatures,u_in_rolling_mean4@RollingFeatures,u_in_rolling_max2@RollingFeatures,u_in_rolling_max4@RollingFeatures,u_in_rolling_min2@RollingFeatures,u_in_rolling_min4@RollingFeatures,R_dummy_20@RCDummy,R_dummy_5@RCDummy,R_dummy_50@RCDummy,C_dummy_10@RCDummy,C_dummy_20@RCDummy,C_dummy_50@RCDummy
0,0,1,1,0.0,0.75,-0.989052,-0.937384,5.837492,-0.725228,-0.648438,-1.564247,-0.326943,0.103988,118.596106,-0.044815,-0.04436,29.16291,2.249821,-300.216809,0.044093,-89.322996,-0.89778,2.889935,-0.830088,-0.724186,-0.864121,-0.842527,-0.819258,-0.794152,2.890207,3.664026,3.743915,4.277824,0.665678,0.479105,0.726592,0.643385,0.459244,0.344693,1.0,0.0,0.0,0.0,0.0,1.0
1,0,2,1,0.0,0.75,-0.963608,3.049278,5.907794,-0.676829,0.162935,0.023254,0.178432,0.201755,26.70599,124.171952,-0.04436,2.077079,19.45646,-81.023078,-309.271741,52.529083,3.040044,3.754459,-0.813604,-0.674672,-0.84736,-0.842527,-0.819258,-0.794152,3.739712,3.724218,4.256189,4.661047,1.122077,0.479105,2.832561,0.643385,-0.823539,0.344693,1.0,0.0,0.0,0.0,0.0,1.0
2,0,3,1,0.0,0.75,-0.938006,3.948195,7.876254,-0.617568,0.555359,0.381205,0.292385,0.278969,1.896601,27.963705,124.333169,17.985065,14.533583,47.65669,-83.468009,-14.973557,3.951625,3.841384,2.828979,-0.613747,2.83314,-0.825784,-0.819258,-0.794152,3.801381,4.236036,4.639136,4.634403,3.764462,0.479105,3.680142,0.643385,2.856483,0.344693,1.0,0.0,0.0,0.0,0.0,1.0
3,0,4,1,0.0,0.75,-0.912278,4.013452,11.742872,-0.557517,0.758212,0.407191,0.300657,0.365404,16.467592,1.988466,28.000409,13.433737,-1.056977,-13.577689,49.092223,-47.513664,4.043281,4.325127,3.672213,-0.551675,3.663022,2.850764,-0.802501,-0.794152,4.32576,4.618643,4.612511,4.570046,4.28594,2.122094,3.741672,2.723238,3.686258,-0.754615,1.0,0.0,0.0,0.0,0.0,1.0
4,0,5,1,0.0,0.75,-0.886409,4.568332,12.234987,-0.490761,0.925096,0.628145,0.370998,0.239085,12.298783,17.244182,1.991553,-0.980141,-2.491501,-43.096139,-13.988739,-4.332884,4.553354,4.725764,3.756997,-0.483218,3.723268,3.679756,2.877119,-0.777379,4.717757,4.592042,4.5482,4.782785,4.621343,3.298583,4.264863,3.107558,3.746496,2.938328,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,u_out,id,breath_id,R,C,time_step,u_in,u_in_cumsum@MultiplyingDividing,u_in_cummean@MultiplyingDividing,u_out0_mean_diff@MultiplyingDividing,u_out0_max_diff@MultiplyingDividing,dt@DerivsAndInts,u_in_diffm1@DerivsAndInts,u_in_diffm1_p1@DerivsAndInts,u_in_diffm1_p2@DerivsAndInts,u_in_diffm1_m1@DerivsAndInts,u_in_diffm1_m2@DerivsAndInts,u_in_diff2m1@DerivsAndInts,u_in_diff2m1_p1@DerivsAndInts,u_in_diff2m1_m1@DerivsAndInts,area@DerivsAndInts,area_m1@DerivsAndInts,area_p1@DerivsAndInts,area_tot@DerivsAndInts,u_in_p1@LagFeatures,u_in_p2@LagFeatures,u_in_p3@LagFeatures,u_in_p4@LagFeatures,u_in_m1@LagFeatures,u_in_m2@LagFeatures,u_in_m3@LagFeatures,u_in_m4@LagFeatures,u_in_rolling_mean2@RollingFeatures,u_in_rolling_mean4@RollingFeatures,u_in_rolling_max2@RollingFeatures,u_in_rolling_max4@RollingFeatures,u_in_rolling_min2@RollingFeatures,u_in_rolling_min4@RollingFeatures,R_dummy_20@RCDummy,R_dummy_5@RCDummy,R_dummy_50@RCDummy,C_dummy_10@RCDummy,C_dummy_20@RCDummy,C_dummy_50@RCDummy
0,0,1,0,-0.333333,0.0,-0.989052,-0.955539,-0.725447,-0.655828,-1.361732,-0.584123,-0.71081,48.676477,-0.044815,-0.04436,50.470873,50.324566,-7.972278,0.044093,-12.981117,-0.915601,0.58429,-0.830088,-0.724409,-0.864121,-0.842527,-0.819258,-0.794152,0.652718,2.085062,3.426496,4.472072,0.669557,0.481963,0.729835,0.646003,0.46269,0.347693,0.0,1.0,0.0,0.0,1.0,0.0
1,0,2,0,-0.333333,0.0,-0.96493,0.681643,-0.705662,-0.322625,-0.709801,-0.376583,-0.701589,46.223115,50.966638,-0.04436,46.523433,38.927995,-11.770266,-8.214292,-34.720553,0.608903,1.957316,-0.830088,-0.705327,-0.864121,-0.842527,-0.819258,-0.794152,2.121998,3.407081,4.450297,5.310562,-0.168287,0.481963,0.600138,0.646003,-0.840297,0.347693,0.0,1.0,0.0,0.0,1.0,0.0
2,0,3,0,-0.333333,0.0,-0.940793,2.236385,-0.667087,-0.000608,-0.090698,-0.179494,-0.701478,42.607439,48.397986,51.033112,35.987006,31.8511,-31.49099,-12.126812,-21.544057,2.056663,3.21904,0.580114,-0.668122,0.647331,-0.842527,-0.819258,-0.794152,3.476461,4.429972,5.288183,5.926252,1.558079,0.481963,2.066091,0.646003,0.670958,0.347693,0.0,1.0,0.0,0.0,1.0,0.0
3,0,4,0,-0.333333,0.0,-0.916656,3.669632,-0.611192,0.306249,0.480025,0.002194,-0.716032,32.956552,44.612401,48.461136,29.444232,23.376188,-19.538057,-32.442233,-25.808486,3.387063,4.200742,1.919327,-0.614264,2.082674,0.667302,-0.819258,-0.794152,4.524455,5.267114,5.90343,6.363341,3.174144,1.173029,3.417487,2.485103,2.106115,-0.771432,0.0,1.0,0.0,0.0,1.0,0.0
4,0,5,0,-0.333333,0.0,-0.892543,4.778584,-0.541895,0.580642,0.921613,0.142772,-0.706255,26.963668,34.507996,44.670652,21.608955,16.58258,-23.406493,-20.128847,-20.679932,4.422198,5.002108,3.149979,-0.547448,3.405852,2.101105,0.691833,-0.794152,5.382142,5.881814,6.340205,6.615627,4.549102,2.398325,4.463108,3.253182,3.429121,0.745129,0.0,1.0,0.0,0.0,1.0,0.0


In [9]:
for col in CFG.cont_seq_cols:
    print(col, train[col].min(), train[col].max())

u_in_m1@LagFeatures -0.8944702777409128 19.69340429471345
u_in_cumsum@MultiplyingDividing -0.7254471901606487 6.446113371344997
u_in_rolling_mean4@RollingFeatures -0.8472088790102397 17.773640861269357
u_in_rolling_min2@RollingFeatures -0.840297259226469 19.269434602459135
u_in_m4@LagFeatures -0.8255672434789728 19.301509493388657
u_in_diff2m1_p1@DerivsAndInts -4308.956449839411 3746.0194175941556
u_out 0 1
u_in_rolling_max2@RollingFeatures -0.9435469215827734 19.597709937353944
C_dummy_10@RCDummy 0.0 1.0
area_tot@DerivsAndInts -0.7244093602910137 6.588994462625688
u_in_diffm1_m1@DerivsAndInts -707.8563660170762 707.7700926042571
u_in -0.9555390882040833 20.82985048548718
R_dummy_20@RCDummy 0.0 1.0
area@DerivsAndInts -0.915601092862522 79.55739474297458
u_in_diffm1@DerivsAndInts -648.3701328365355 648.2792927040214
u_in_rolling_min4@RollingFeatures -0.7714324587291198 19.40891006661144
u_in_p1@LagFeatures -0.8641206060461288 19.24822152642934
u_in_diff2m1@DerivsAndInts -4182.8178868586

In [10]:
print(set(train.drop(["id", "breath_id", "pressure"], axis=1).columns) - set(CFG.cont_seq_cols))
print(train.drop(["id", "breath_id", "pressure"], axis=1).shape)
print(len(CFG.cont_seq_cols))

X = np.float32(train.drop(["id", "breath_id", "pressure"], axis=1)).reshape(-1, 80, len(CFG.cont_seq_cols))
y = np.float32(train["pressure"]).reshape(-1, 80, 1)
X_test = np.float32(test.drop(["id", "breath_id"], axis=1)).reshape(-1, 80, len(CFG.cont_seq_cols))

set()
(6036000, 42)
42


In [11]:
# ====================================================
# Loss
# ====================================================

class L1Loss_masked(nn.Module):
    
    def __init__(self):
        super().__init__()
        
    def forward(self, preds, y, u_out):
        mask = 1 - u_out
        mae = torch.abs(mask * (y - preds))
        mae = torch.sum(mae) / torch.sum(mask)
        return mae

# ====================================================
# Model
# ====================================================

class CustomModel(nn.Module):
    
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.hidden_size = cfg.hidden_size
        
        self.lstm0 = nn.LSTM(len(cfg.cont_seq_cols), self.hidden_size//2, dropout=0.0, batch_first=True, bidirectional=True)
        self.batch_norm_0 = nn.BatchNorm1d(80)
        self.lstm1 = nn.LSTM(self.hidden_size//2 * 2, self.hidden_size//2, dropout=0.0, batch_first=True, bidirectional=True)
        self.batch_norm_1 = nn.BatchNorm1d(80)
        self.lstm2 = nn.LSTM(self.hidden_size//2 * 2, self.hidden_size//4, dropout=0.0, batch_first=True, bidirectional=True)
        self.batch_norm_2 = nn.BatchNorm1d(80)
        self.lstm3 = nn.LSTM(self.hidden_size//4 * 2, self.hidden_size//8, dropout=0.0, batch_first=True, bidirectional=True)
        self.batch_norm_3 = nn.BatchNorm1d(80)
        self.head = nn.Sequential(
            nn.Linear(self.hidden_size//8 * 2, self.hidden_size//8 * 2),
            nn.LayerNorm(self.hidden_size//8 * 2),
            #nn.Dropout(0.1),
            nn.SELU(),
            nn.Linear(self.hidden_size//8 * 2, 1),
        )
        for n, m in self.named_modules():
            if isinstance(m, (nn.LSTM, nn.GRU)):
                print(f'init {m}')
                nn.init.xavier_uniform_(m.weight_ih_l0)
                nn.init.orthogonal_(m.weight_hh_l0)
                nn.init.xavier_uniform_(m.weight_ih_l0_reverse)
                nn.init.orthogonal_(m.weight_hh_l0_reverse)

    def forward(self, cont_seq_x):
        bs = cont_seq_x.size(0)
        seq_emb, _ = self.lstm0(cont_seq_x)
        seq_emb = self.batch_norm_0(seq_emb)
        seq_emb, _ = self.lstm1(seq_emb)
        seq_emb = self.batch_norm_1(seq_emb)
        seq_emb, _ = self.lstm2(seq_emb)
        seq_emb = self.batch_norm_2(seq_emb)
        seq_emb, _ = self.lstm3(seq_emb)
        seq_emb = self.batch_norm_3(seq_emb)
        output = self.head(seq_emb)
        return output

print(CustomModel(CFG))

init LSTM(42, 256, batch_first=True, bidirectional=True)
init LSTM(512, 256, batch_first=True, bidirectional=True)
init LSTM(512, 128, batch_first=True, bidirectional=True)
init LSTM(256, 64, batch_first=True, bidirectional=True)
CustomModel(
  (lstm0): LSTM(42, 256, batch_first=True, bidirectional=True)
  (batch_norm_0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm1): LSTM(512, 256, batch_first=True, bidirectional=True)
  (batch_norm_1): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm2): LSTM(512, 128, batch_first=True, bidirectional=True)
  (batch_norm_2): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm3): LSTM(256, 64, batch_first=True, bidirectional=True)
  (batch_norm_3): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (head): Sequential(
    (0): Linear(in_features=128, out_features=128, bias=True)
    (1): LayerNorm((128,), 

In [12]:
# ====================================================
# helper function
# ====================================================

class AverageMeter(object):
    
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

scaler = GradScaler()

def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    losses = AverageMeter()
    start = end = time()
    for step, (inputs, y) in enumerate(train_loader):
        inputs, y = inputs.to(device), y.to(device)
        batch_size = inputs.size(0)
        with autocast():
            pred = model(inputs)
            loss = criterion(pred, y, inputs[:,:,0].reshape(-1,80,1))
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        if CFG.apex:
            scaler.scale(loss).backward()
        else:
            loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            if CFG.apex:
                scaler.step(optimizer)
            else:
                optimizer.step()
            optimizer.zero_grad()
            lr = 0
            if CFG.batch_scheduler:
                scheduler.step()
                lr = scheduler.get_lr()[0]
        if CFG.apex:
            scaler.update()
        end = time()
    return losses.avg

def valid_fn(valid_loader, model, criterion, device):
    model.eval()
    preds = []
    losses = AverageMeter()
    start = end = time()
    for step, (inputs, y) in enumerate(valid_loader):
        inputs, y = inputs.to(device), y.to(device)
        batch_size = inputs.size(0)
        with torch.no_grad():
            pred = model(inputs)
        loss = criterion(pred, y, inputs[:,:,0].reshape(-1,80,1))
        losses.update(loss.item(), batch_size)
        preds.append(pred.view(-1).detach().cpu().numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        end = time()
    preds = np.concatenate(preds)
    return losses.avg, preds

def inference_fn(test_loader, model, device):
    model.eval()
    model.to(device)
    preds = []
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    for step, (cont_seq_x) in tk0:
        cont_seq_x = cont_seq_x.to(device)
        with torch.no_grad():
            pred = model(cont_seq_x)
        preds.append(pred.view(-1).detach().cpu().numpy())
    preds = np.concatenate(preds)
    return preds

In [13]:
# ====================================================
# train loop
# ====================================================

def train_loop(folds, fold, trn_idx, val_idx):
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================    
    train_folds = X[trn_idx]
    valid_folds = X[val_idx]
    groups = train["breath_id"].unique()[val_idx]
    oof_folds = train[train["breath_id"].isin(groups)].reset_index(drop=True)
    y_train = y[trn_idx]
    y_true = y[val_idx]

    train_dataset = torch.utils.data.TensorDataset(
        torch.from_numpy(train_folds),
        torch.from_numpy(y_train)
    )
    valid_dataset = torch.utils.data.TensorDataset(
        torch.from_numpy(valid_folds),
        torch.from_numpy(y_true)
    )

    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG)
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    #optimizer = torch.optim.Adam(model.parameters(), lr=0.0008, eps=1e-08)
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    
    def get_scheduler(optimizer):
        if CFG.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=CFG.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif CFG.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=CFG.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=CFG.num_cycles
            )
        elif CFG.scheduler == 'ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler == 'CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler == 'CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler

    scheduler = get_scheduler(optimizer)

    # ====================================================
    # apex
    # ====================================================
    #if CFG.apex:
    #    model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)

    # ====================================================
    # loop
    # ====================================================
    criterion = L1Loss_masked()
    best_score = np.inf
    avg_losses = []
    avg_val_losses = []
    for epoch in range(CFG.epochs):
        start_time = time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)
        #avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, None, device)
        avg_losses.append(avg_loss)
        
        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        avg_val_losses.append(avg_val_loss)
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score = avg_val_loss #get_score(y_true[non_expiratory_phase_val_idx], preds[non_expiratory_phase_val_idx])

        elapsed = time() - start_time

        best_notice = ""
        if score < best_score:
            best_notice = "Best Score"
            best_score = score
            # LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'preds': preds},
                        OUTPUT_DIR+f"fold{fold}_best.pth")
    
        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s, lr: {optimizer.param_groups[0]["lr"]:.5f}, MAE Score: {score:.4f}, {best_notice}')

    plt.figure(figsize=(14,6))
    plt.plot(avg_losses, label="Train Loss")
    plt.plot(avg_val_losses, label="Val Loss")
    plt.title(f"Fold {fold + 1} - Best score {best_score:.4f}", size=18)
    plt.legend()
    plt.show()

    preds = torch.load(OUTPUT_DIR+f"fold{fold}_best.pth", map_location=torch.device('cpu'))['preds']
    oof_folds['preds'] = preds.flatten()

    torch.cuda.empty_cache()
    gc.collect()
    
    return oof_folds

In [None]:
# ====================================================
# Prepare: 1.train 2.test
# ====================================================

def get_result(result_df):
    preds = result_df['preds'].values
    labels = result_df['pressure'].values
    non_expiratory_phase_val_idx = result_df[result_df['u_out'] == 0].index # The expiratory phase is not scored
    score = get_score(labels[non_expiratory_phase_val_idx], preds[non_expiratory_phase_val_idx])
    LOGGER.info(f'Score (without expiratory phase): {score:<.4f}')

# train
if CFG.train:
    oof_df = pd.DataFrame()
    kfold = KFold(n_splits=CFG.n_fold, random_state=42, shuffle=True)
    for fold, (trn_idx, val_idx) in enumerate(kfold.split(X=X, y=y)):
        if fold in CFG.trn_fold:
            _oof_df = train_loop(X, fold, trn_idx, val_idx)
            oof_df = pd.concat([oof_df, _oof_df])
            LOGGER.info(f"========== fold: {fold} result ==========")
            get_result(_oof_df)
    # CV result
    LOGGER.info(f"========== CV ==========")
    get_result(oof_df)
    # save result
    oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)

    for i, breath_id in enumerate(oof_df["breath_id"].unique()):
        oof_df[oof_df["breath_id"]==breath_id].plot(x="time_step", y=["preds", "pressure", "u_out"], figsize=(16, 5))
        plt.show()
        if i == 10:
            break



init LSTM(42, 256, batch_first=True, bidirectional=True)
init LSTM(512, 256, batch_first=True, bidirectional=True)
init LSTM(512, 128, batch_first=True, bidirectional=True)
init LSTM(256, 64, batch_first=True, bidirectional=True)


Epoch 1 - avg_train_loss: 4.3296  avg_val_loss: 1.5377  time: 167s, lr: 0.00100, MAE Score: 1.5377, Best Score
Epoch 2 - avg_train_loss: 1.1927  avg_val_loss: 0.9439  time: 170s, lr: 0.00100, MAE Score: 0.9439, Best Score
Epoch 3 - avg_train_loss: 0.9021  avg_val_loss: 0.8270  time: 172s, lr: 0.00099, MAE Score: 0.8270, Best Score
Epoch 4 - avg_train_loss: 0.7995  avg_val_loss: 0.8010  time: 176s, lr: 0.00098, MAE Score: 0.8010, Best Score


In [None]:
# ====================================================
# Getting output
# ====================================================

if CFG.inference:
    test_loader = DataLoader(X_test, batch_size=512, shuffle=False, pin_memory=True)
    #test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size * 2, shuffle=False, num_workers=CFG.num_workers, pin_memory=True)
    for fold in CFG.trn_fold:
        model = CustomModel(CFG)
        path = OUTPUT_DIR + f"fold{fold}_best.pth"
        state = torch.load(path, map_location=torch.device('cpu'))
        model.load_state_dict(state['model'])
        predictions = inference_fn(test_loader, model, device)
        test[f'fold{fold}'] = predictions
        del state, predictions; gc.collect()
        torch.cuda.empty_cache()
        
    # submission
    test['pressure'] = test[[f'fold{fold}' for fold in CFG.trn_fold]].median(1)
    test[['id', 'pressure']+[f'fold{fold}' for fold in CFG.trn_fold]].to_csv(OUTPUT_DIR+'raw_submission.csv', index=False)
    test[['id', 'pressure']].to_csv(OUTPUT_DIR+'submission.csv', index=False)