In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import os

OUTPUT_DIR = './'
DATA_DIR = "./"
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [4]:
# ====================================================
# CFG
# ====================================================

class CFG:
    competition='ventilator'
    apex=True
    print_freq=1000
    num_workers=0
    model_name='rnn'
    scheduler='cosine'   # ['linear', 'cosine', 'ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    batch_scheduler=True # Valid For CosineAnnealingWarmRestarts, cosine
    num_warmup_steps=100 # ['linear', 'cosine']
    num_cycles=0.5 # 'cosine'
    factor=0.995   # ReduceLROnPlateau
    patience=7     # ReduceLROnPlateau
    eps=1e-6       # ReduceLROnPlateau
    T_max=50       # CosineAnnealingLR
    T_0=20         # CosineAnnealingWarmRestarts
    epochs=100
    max_grad_norm=1000
    gradient_accumulation_steps=1
    hidden_size=512
    lr=1e-3
    min_lr=1e-6
    weight_decay=1e-6
    batch_size=64
    n_fold = 2
    trn_fold=[0]
    cate_seq_cols=[]
    cont_seq_cols = ['R','C','time_step', 'u_in', 'u_out']
    train=True
    inference=True
    debug=False

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold=[0]

In [5]:
# !pip install category_encoders

In [6]:
# !pip install transformers

In [7]:
# ====================================================
# Library
# ====================================================
import os
import gc
import sys
import json
import math
import random
from time import time
from datetime import datetime
from collections import Counter, defaultdict

import scipy as sp
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

from tqdm.auto import tqdm
import category_encoders as ce

from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from torch.cuda.amp import GradScaler
from torch.cuda.amp import autocast

from transformers import AdamW
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

import warnings
warnings.filterwarnings("ignore")

#if CFG.apex:
#    from apex import amp

DEVICE = device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("Python     : " + sys.version.split("\n")[0])
print("Numpy      : " + np.__version__)
print("Pandas     : " + pd.__version__)
print("PyTorch    : " + torch.__version__)
print("Running on device: {}".format(DEVICE))

Python     : 3.8.8 (default, Apr 13 2021, 15:08:03) [MSC v.1916 64 bit (AMD64)]
Numpy      : 1.19.5
Pandas     : 1.2.4
PyTorch    : 1.8.1
Running on device: cuda


In [7]:
# ====================================================
# Utils
# ====================================================
def get_score(y_trues, y_preds):
    score = mean_absolute_error(y_trues, y_preds)
    return score


def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything()

def decorate(s: str, decoration=None):
    if decoration is None:
        decoration = '★' * 20

    return ' '.join([decoration, str(s), decoration])

class Timer:
    def __init__(self, logger=None, format_str='{:.3f}[s]', prefix=None, suffix=None, sep=' ', verbose=0):

        if prefix: format_str = str(prefix) + sep + format_str
        if suffix: format_str = format_str + sep + str(suffix)
        self.format_str = format_str
        self.logger = logger
        self.start = None
        self.end = None
        self.verbose = verbose

    @property
    def duration(self):
        if self.end is None:
            return 0
        return self.end - self.start

    def __enter__(self):
        self.start = time()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.end = time()
        if self.verbose is None:
            return
        out_str = self.format_str.format(self.duration)
        if self.logger:
            self.logger.info(out_str)
        else:
            print(out_str)

In [8]:
# ====================================================
# Data Loading
# ====================================================

train = pd.read_csv(DATA_DIR + 'train.csv')

if CFG.debug:
    train = train[:80*10_000]
    gc.collect()

test = pd.read_csv(DATA_DIR + 'test.csv')
sub = pd.read_csv(DATA_DIR + 'sample_submission.csv')
gc.collect()

all_pressure = sorted(train.pressure.unique())
PRESSURE_MIN = np.min(all_pressure)
PRESSURE_MAX = np.max(all_pressure)
PRESSURE_STEP = all_pressure[1] - all_pressure[0]

display(train.head())
display(test.head())
display(sub.head())

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.0,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.35585,0,12.234987


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.0,0.0,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.23061,0
4,5,0,5,20,0.127644,26.320956,0


Unnamed: 0,id,pressure
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0


## Add Features

In [9]:
class AbstractBaseBlock:
    def fit(self, input_df: pd.DataFrame, y=None):
        return self.transform(input_df)

    def transform(self, input_df: pd.DataFrame) -> pd.DataFrame:
        raise NotImplementedError()

In [10]:
class AddBreathTimeAndUInTime(AbstractBaseBlock):
    def transform(self, input_df):
        output_df = pd.DataFrame(
            {
#                 "breath_time": input_df['time_step'] - input_df['time_step'].shift(1),
#                 "u_in_time": input_df['u_in'] - input_df['u_in'].shift(1)
                "breath_id_lag": input_df['breath_id'].shift(1).fillna(0),
                "breath_id_lag2": input_df['breath_id'].shift(2).fillna(0),
                "breath_id__u_in_lag": input_df['u_in'].shift(1).fillna(0),
                "breath_id__u_in_lag2": input_df['u_in'].shift(2).fillna(0)
            }
        )
        output_df["breath_id_lagsame"] = np.select([output_df['breath_id_lag']==input_df['breath_id']],[1],0)
        output_df["breath_id_lag2same"] = np.select([output_df['breath_id_lag2']==input_df['breath_id']],[1],0)
        output_df["breath_id__u_in_lag"] = output_df['breath_id__u_in_lag'] * output_df['breath_id_lagsame']
        output_df["breath_id__u_in_lag2"] = output_df['breath_id__u_in_lag2'] * output_df['breath_id_lag2same']
        
        output_df['breath_id__u_in__max'] = input_df.groupby(['breath_id'])['u_in'].transform('max')
        output_df['breath_id__u_in__mean'] = input_df.groupby(['breath_id'])['u_in'].transform('mean')
        output_df['breath_id__u_in__diffmax'] = input_df.groupby(['breath_id'])['u_in'].transform('max') - input_df['u_in']
        output_df['breath_id__u_in__diffmean'] = input_df.groupby(['breath_id'])['u_in'].transform('mean') - input_df['u_in']
        
        output_df['time_step_diff'] = input_df.groupby('breath_id')['time_step'].diff().fillna(0)
        output_df['ewm_u_in_mean'] = (input_df\
                               .groupby('breath_id')['u_in']\
                               .ewm(halflife=9)\
                               .mean()\
                               .reset_index(level=0,drop=True))
        output_df[["15_in_sum","15_in_min","15_in_max","15_in_mean"]] = (input_df\
                                                                  .groupby('breath_id')['u_in']\
                                                                  .rolling(window=15,min_periods=1)\
                                                                  .agg({"15_in_sum":"sum",
                                                                        "15_in_min":"min",
                                                                        "15_in_max":"max",
                                                                        "15_in_mean":"mean"
                                                                        #"15_in_std":"std"
                                                                   })\
                                                                   .reset_index(level=0,drop=True))
        
        
        CFG.cont_seq_cols += output_df.columns.tolist()
        return output_df


In [11]:
class LagFeatures(AbstractBaseBlock):
    def transform(self, input_df):
        output_df = pd.DataFrame(
            {
                "u_in_lag1": input_df.groupby("breath_id")["u_in"].shift(1).fillna(0),
                "u_in_lag2": input_df.groupby("breath_id")["u_in"].shift(2).fillna(0),
                "u_in_lag3": input_df.groupby("breath_id")["u_in"].shift(3).fillna(0),
                "u_in_lag4": input_df.groupby("breath_id")["u_in"].shift(4).fillna(0),
                "u_in_lag_back1": input_df.groupby("breath_id")["u_in"].shift(-1).fillna(0),
                "u_in_lag_back2": input_df.groupby("breath_id")["u_in"].shift(-2).fillna(0),
                "u_in_lag_back3": input_df.groupby("breath_id")["u_in"].shift(-3).fillna(0),
                "u_in_lag_back4": input_df.groupby("breath_id")["u_in"].shift(-4).fillna(0),
                "u_out_lag1": input_df.groupby("breath_id")["u_out"].shift(1).fillna(0),
                "u_out_lag2": input_df.groupby("breath_id")["u_out"].shift(2).fillna(0),
                "u_out_lag3": input_df.groupby("breath_id")["u_out"].shift(3).fillna(0),
                "u_out_lag4": input_df.groupby("breath_id")["u_out"].shift(4).fillna(0),
                "u_out_lag_back1": input_df.groupby("breath_id")["u_out"].shift(-1).fillna(0),
                "u_out_lag_back2": input_df.groupby("breath_id")["u_out"].shift(-2).fillna(0),
                "u_out_lag_back3": input_df.groupby("breath_id")["u_out"].shift(-3).fillna(0),
                "u_out_lag_back4": input_df.groupby("breath_id")["u_out"].shift(-4).fillna(0)
            }
        )
        
        output_df['u_in_diff1'] = input_df['u_in'] - output_df['u_in_lag1']
        output_df['u_out_diff1'] = input_df['u_out'] - output_df['u_out_lag1']
        output_df['u_in_diff2'] = input_df['u_in'] - output_df['u_in_lag2']
        output_df['u_out_diff2'] = input_df['u_out'] - output_df['u_out_lag2']
        output_df['u_in_diff3'] = input_df['u_in'] - output_df['u_in_lag3']
        output_df['u_out_diff3'] = input_df['u_out'] - output_df['u_out_lag3']
        output_df['u_in_diff4'] = input_df['u_in'] - output_df['u_in_lag4']
        output_df['u_out_diff4'] = input_df['u_out'] - output_df['u_out_lag4']
        
        output_df['u_in_lagback_diff1'] = input_df['u_in'] - output_df['u_in_lag_back1']
        output_df['u_out_lagback_diff1'] = input_df['u_out'] - output_df['u_out_lag_back1']
        output_df['u_in_lagback_diff2'] = input_df['u_in'] - output_df['u_in_lag_back2']
        output_df['u_out_lagback_diff2'] = input_df['u_out'] - output_df['u_out_lag_back2']
        
        for col in output_df.columns:
            output_df[col] = output_df[col].fillna(0)
        
        CFG.cont_seq_cols += output_df.columns.tolist()
        return output_df

In [12]:
class AddMultiplyingDividing(AbstractBaseBlock):
    def transform(self, input_df):
        input_df['cross']= input_df['u_in'] * input_df['u_out']
        input_df['cross2']= input_df['time_step'] * input_df['u_out']
        input_df['area'] = input_df['time_step'] * input_df['u_in']
        input_df['area'] = input_df.groupby('breath_id')['area'].cumsum()
        input_df['time_step_cumsum'] = input_df.groupby(['breath_id'])['time_step'].cumsum()
        input_df['u_in_cumsum'] = (input_df['u_in']).groupby(input_df['breath_id']).cumsum()
        input_df['one'] = 1
        input_df['count'] = (input_df['one']).groupby(input_df['breath_id']).cumsum()
        input_df['u_in_cummean'] =input_df['u_in_cumsum'] /input_df['count']
    
        output_df = pd.DataFrame(
            {
                "cross": input_df['cross'],
                "cross2": input_df['cross2'],
                "area": input_df['area'],
                "time_step_cumsum": input_df['time_step_cumsum'],
                "u_in_cumsum": input_df['u_in_cumsum'],
                "one": input_df['one'],
                "count": input_df['count'],
                "u_in_cummean": input_df['u_in_cummean'],
    #             "u_out1_mean": input_df['u_out1_mean'],
    #             "u_out1_max": input_df['u_out1_max'],
    #             "u_out1_max": input_df['u_out1_std'],
            }
        )
    
        CFG.cont_seq_cols += output_df.columns.tolist()
        return output_df
    

In [13]:
class RCMixed(AbstractBaseBlock):
    def transform(self, input_df):
        input_df['R'] = input_df['R'].astype(str)
        input_df['C'] = input_df['C'].astype(str)
        
        input_df['R__C'] = input_df["R"].astype(str) + '__' + input_df["C"].astype(str)
        output_df = pd.get_dummies(input_df)
        
        CFG.cont_seq_cols += output_df.columns.tolist()
        return output_df

In [14]:
feature_blocks = [
    AddMultiplyingDividing(),
    AddBreathTimeAndUInTime(),
    RCMixed(),
    LagFeatures(),
]

In [15]:
def run_blocks(input_df, blocks, y=None, test=False):
    out_df = pd.DataFrame()

    print(decorate('start run blocks...'))

    with Timer(prefix='run test={}'.format(test)):
        for block in feature_blocks:
            with Timer(prefix='out_df shape: {} \t- {}'.format(out_df.shape, str(block))):
                if not test:
                    out_i = block.fit(input_df.copy(), y=y)
                else:
                    out_i = block.transform(input_df.copy())

            assert len(input_df) == len(out_i), block
            name = block.__class__.__name__
            out_df = pd.concat([out_df, out_i.add_suffix(f'')], axis=1)
    print(f"out_df shape: {out_df.shape}")
    return pd.concat([input_df, out_df], axis=1)


train = run_blocks(train, blocks=feature_blocks)
test = run_blocks(test, blocks=feature_blocks, test=True)

CFG.cont_seq_cols = list(set(CFG.cont_seq_cols))

display(train.head())
display(test.head())

★★★★★★★★★★★★★★★★★★★★ start run blocks... ★★★★★★★★★★★★★★★★★★★★
out_df shape: (0, 0) 	- <__main__.AddMultiplyingDividing object at 0x0000015A88FC8A60> 0.812[s]
out_df shape: (6036000, 8) 	- <__main__.AddBreathTimeAndUInTime object at 0x0000015A88FC8B20> 62.506[s]
out_df shape: (6036000, 24) 	- <__main__.RCMixed object at 0x0000015A88FC8760> 10.656[s]
out_df shape: (6036000, 45) 	- <__main__.LagFeatures object at 0x0000015A88FC8F10> 5.040[s]
run test=False 84.776[s]
out_df shape: (6036000, 73)
★★★★★★★★★★★★★★★★★★★★ start run blocks... ★★★★★★★★★★★★★★★★★★★★
out_df shape: (0, 0) 	- <__main__.AddMultiplyingDividing object at 0x0000015A88FC8A60> 0.603[s]
out_df shape: (4024000, 8) 	- <__main__.AddBreathTimeAndUInTime object at 0x0000015A88FC8B20> 39.593[s]
out_df shape: (4024000, 24) 	- <__main__.RCMixed object at 0x0000015A88FC8760> 6.194[s]
out_df shape: (4024000, 44) 	- <__main__.LagFeatures object at 0x0000015A88FC8F10> 3.050[s]
run test=True 52.513[s]
out_df shape: (4024000, 72)


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure,cross,cross2,area,time_step_cumsum,u_in_cumsum,one,count,u_in_cummean,breath_id_lag,breath_id_lag2,breath_id__u_in_lag,breath_id__u_in_lag2,breath_id_lagsame,breath_id_lag2same,breath_id__u_in__max,breath_id__u_in__mean,breath_id__u_in__diffmax,breath_id__u_in__diffmean,time_step_diff,ewm_u_in_mean,15_in_sum,15_in_min,15_in_max,15_in_mean,id.1,breath_id.1,time_step.1,u_in.1,u_out.1,pressure.1,R_20,R_5,R_50,C_10,C_20,C_50,R__C_20__10,R__C_20__20,R__C_20__50,R__C_50__10,R__C_50__20,R__C_50__50,R__C_5__10,R__C_5__20,R__C_5__50,u_in_lag1,u_in_lag2,u_in_lag3,u_in_lag4,u_in_lag_back1,u_in_lag_back2,u_in_lag_back3,u_in_lag_back4,u_out_lag1,u_out_lag2,u_out_lag3,u_out_lag4,u_out_lag_back1,u_out_lag_back2,u_out_lag_back3,u_out_lag_back4,u_in_diff1,u_out_diff1,u_in_diff2,u_out_diff2,u_in_diff3,u_out_diff3,u_in_diff4,u_out_diff4,u_in_lagback_diff1,u_out_lagback_diff1,u_in_lagback_diff2,u_out_lagback_diff2
0,1,1,20,50,0.0,0.083334,0,5.837492,0.0,0.0,0.0,0.0,0.083334,1,1,0.083334,0.0,0.0,0.0,0.0,0,0,28.313036,10.146007,28.229702,10.062673,0.0,0.083334,0.083334,0.083334,0.083334,0.083334,1,1,0.0,0.083334,0,5.837492,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0.0,0.0,0.0,0.0,18.383041,22.509278,22.808822,25.35585,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083334,0.0,0.083334,0.0,0.083334,0.0,0.083334,0.0,-18.299707,0.0,-22.425944,0.0
1,2,1,20,50,0.033652,18.383041,0,5.907794,0.0,0.0,0.618632,0.033652,18.466375,1,2,9.233188,1.0,0.0,0.083334,0.0,1,0,28.313036,10.146007,9.929994,-8.237035,0.033652,9.585358,18.466375,0.083334,18.383041,9.233188,2,1,0.033652,18.383041,0,5.907794,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0.083334,0.0,0.0,0.0,22.509278,22.808822,25.35585,27.259866,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.299707,0.0,18.383041,0.0,18.383041,0.0,18.383041,0.0,-4.126236,0.0,-4.425781,0.0
2,3,1,20,50,0.067514,22.509278,0,7.876254,0.0,0.0,2.138333,0.101167,40.975653,1,3,13.658551,1.0,1.0,18.383041,0.083334,1,1,28.313036,10.146007,5.803758,-12.363271,0.033862,14.22904,40.975653,0.083334,22.509278,13.658551,3,1,0.067514,22.509278,0,7.876254,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,18.383041,0.083334,0.0,0.0,22.808822,25.35585,27.259866,27.127486,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.126236,0.0,22.425944,0.0,22.509278,0.0,22.509278,0.0,-0.299544,0.0,-2.846573,0.0
3,4,1,20,50,0.101542,22.808822,0,11.742872,0.0,0.0,4.454391,0.202709,63.784476,1,4,15.946119,1.0,1.0,22.509278,18.383041,1,1,28.313036,10.146007,5.504214,-12.662816,0.034028,16.627759,63.784476,0.083334,22.808822,15.946119,4,1,0.101542,22.808822,0,11.742872,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,22.509278,18.383041,0.083334,0.0,25.35585,27.259866,27.127486,26.807732,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.299544,0.0,4.425781,0.0,22.725488,0.0,22.808822,0.0,-2.547028,0.0,-4.451044,0.0
4,5,1,20,50,0.135756,25.35585,0,12.234987,0.0,0.0,7.896588,0.338464,89.140326,1,5,17.828065,1.0,1.0,22.808822,22.509278,1,1,28.313036,10.146007,2.957185,-15.209844,0.034213,18.652046,89.140326,0.083334,25.35585,17.828065,5,1,0.135756,25.35585,0,12.234987,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,22.808822,22.509278,18.383041,0.083334,27.259866,27.127486,26.807732,27.864715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.547028,0.0,2.846573,0.0,6.972809,0.0,25.272516,0.0,-1.904016,0.0,-1.771635,0.0


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,cross,cross2,area,time_step_cumsum,u_in_cumsum,one,count,u_in_cummean,breath_id_lag,breath_id_lag2,breath_id__u_in_lag,breath_id__u_in_lag2,breath_id_lagsame,breath_id_lag2same,breath_id__u_in__max,breath_id__u_in__mean,breath_id__u_in__diffmax,breath_id__u_in__diffmean,time_step_diff,ewm_u_in_mean,15_in_sum,15_in_min,15_in_max,15_in_mean,id.1,breath_id.1,time_step.1,u_in.1,u_out.1,R_20,R_5,R_50,C_10,C_20,C_50,R__C_20__10,R__C_20__20,R__C_20__50,R__C_50__10,R__C_50__20,R__C_50__50,R__C_5__10,R__C_5__20,R__C_5__50,u_in_lag1,u_in_lag2,u_in_lag3,u_in_lag4,u_in_lag_back1,u_in_lag_back2,u_in_lag_back3,u_in_lag_back4,u_out_lag1,u_out_lag2,u_out_lag3,u_out_lag4,u_out_lag_back1,u_out_lag_back2,u_out_lag_back3,u_out_lag_back4,u_in_diff1,u_out_diff1,u_in_diff2,u_out_diff2,u_in_diff3,u_out_diff3,u_in_diff4,u_out_diff4,u_in_lagback_diff1,u_out_lagback_diff1,u_in_lagback_diff2,u_out_lagback_diff2
0,1,0,5,20,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,1,1,0.0,0.0,0.0,0.0,0.0,1,1,37.542219,9.327338,37.542219,9.327338,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0.0,0.0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,7.515046,14.651675,21.23061,26.320956,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-7.515046,0.0,-14.651675,0.0
1,2,0,5,20,0.031904,7.515046,0,0.0,0.0,0.239758,0.031904,7.515046,1,2,3.757523,0.0,0.0,0.0,0.0,1,1,37.542219,9.327338,30.027173,1.812292,0.031904,3.902147,7.515046,0.0,7.515046,3.757523,2,0,0.031904,7.515046,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,14.651675,21.23061,26.320956,30.486938,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.515046,0.0,7.515046,0.0,7.515046,0.0,7.515046,0.0,-7.13663,0.0,-13.715564,0.0
2,3,0,5,20,0.063827,14.651675,0,0.0,0.0,1.174935,0.095731,22.166721,1,3,7.388907,0.0,0.0,7.515046,0.0,1,1,37.542219,9.327338,22.890543,-5.324338,0.031924,7.764551,22.166721,0.0,14.651675,7.388907,3,0,0.063827,14.651675,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,7.515046,0.0,0.0,0.0,21.23061,26.320956,30.486938,33.54595,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.13663,0.0,14.651675,0.0,14.651675,0.0,14.651675,0.0,-6.578935,0.0,-11.669281,0.0
3,4,0,5,20,0.095751,21.23061,0,0.0,0.0,3.207788,0.191482,43.397331,1,4,10.849333,0.0,0.0,14.651675,7.515046,1,1,37.542219,9.327338,16.311609,-11.903272,0.031924,11.529365,43.397331,0.0,21.23061,10.849333,4,0,0.095751,21.23061,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,14.651675,7.515046,0.0,0.0,26.320956,30.486938,33.54595,35.7176,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.578935,0.0,13.715564,0.0,21.23061,0.0,21.23061,0.0,-5.090346,0.0,-9.256328,0.0
4,5,0,5,20,0.127644,26.320956,0,0.0,0.0,6.567489,0.319126,69.718287,1,5,13.943657,0.0,0.0,21.23061,14.651675,1,1,37.542219,9.327338,11.221263,-16.993619,0.031893,14.959946,69.718287,0.0,26.320956,13.943657,5,0,0.127644,26.320956,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,21.23061,14.651675,7.515046,0.0,30.486938,33.54595,35.7176,36.971061,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.090346,0.0,11.669281,0.0,18.805911,0.0,26.320956,0.0,-4.165982,0.0,-7.224994,0.0


In [16]:
Cols = list(train.columns)
for i,item in enumerate(train.columns):
    if item in train.columns[:i]: Cols[i] = "toDROP"
train.columns = Cols
train = train.drop("toDROP",1)

In [17]:
Cols = list(test.columns)
for i,item in enumerate(test.columns):
    if item in test.columns[:i]: Cols[i] = "toDROP"
test.columns = Cols
test = test.drop("toDROP",1)

In [18]:
train.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure,cross,cross2,area,time_step_cumsum,u_in_cumsum,one,count,u_in_cummean,breath_id_lag,breath_id_lag2,breath_id__u_in_lag,breath_id__u_in_lag2,breath_id_lagsame,breath_id_lag2same,breath_id__u_in__max,breath_id__u_in__mean,breath_id__u_in__diffmax,breath_id__u_in__diffmean,time_step_diff,ewm_u_in_mean,15_in_sum,15_in_min,15_in_max,15_in_mean,R_20,R_5,R_50,C_10,C_20,C_50,R__C_20__10,R__C_20__20,R__C_20__50,R__C_50__10,R__C_50__20,R__C_50__50,R__C_5__10,R__C_5__20,R__C_5__50,u_in_lag1,u_in_lag2,u_in_lag3,u_in_lag4,u_in_lag_back1,u_in_lag_back2,u_in_lag_back3,u_in_lag_back4,u_out_lag1,u_out_lag2,u_out_lag3,u_out_lag4,u_out_lag_back1,u_out_lag_back2,u_out_lag_back3,u_out_lag_back4,u_in_diff1,u_out_diff1,u_in_diff2,u_out_diff2,u_in_diff3,u_out_diff3,u_in_diff4,u_out_diff4,u_in_lagback_diff1,u_out_lagback_diff1,u_in_lagback_diff2,u_out_lagback_diff2
0,1,1,20,50,0.0,0.083334,0,5.837492,0.0,0.0,0.0,0.0,0.083334,1,1,0.083334,0.0,0.0,0.0,0.0,0,0,28.313036,10.146007,28.229702,10.062673,0.0,0.083334,0.083334,0.083334,0.083334,0.083334,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0.0,0.0,0.0,0.0,18.383041,22.509278,22.808822,25.35585,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083334,0.0,0.083334,0.0,0.083334,0.0,0.083334,0.0,-18.299707,0.0,-22.425944,0.0
1,2,1,20,50,0.033652,18.383041,0,5.907794,0.0,0.0,0.618632,0.033652,18.466375,1,2,9.233188,1.0,0.0,0.083334,0.0,1,0,28.313036,10.146007,9.929994,-8.237035,0.033652,9.585358,18.466375,0.083334,18.383041,9.233188,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0.083334,0.0,0.0,0.0,22.509278,22.808822,25.35585,27.259866,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.299707,0.0,18.383041,0.0,18.383041,0.0,18.383041,0.0,-4.126236,0.0,-4.425781,0.0
2,3,1,20,50,0.067514,22.509278,0,7.876254,0.0,0.0,2.138333,0.101167,40.975653,1,3,13.658551,1.0,1.0,18.383041,0.083334,1,1,28.313036,10.146007,5.803758,-12.363271,0.033862,14.22904,40.975653,0.083334,22.509278,13.658551,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,18.383041,0.083334,0.0,0.0,22.808822,25.35585,27.259866,27.127486,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.126236,0.0,22.425944,0.0,22.509278,0.0,22.509278,0.0,-0.299544,0.0,-2.846573,0.0
3,4,1,20,50,0.101542,22.808822,0,11.742872,0.0,0.0,4.454391,0.202709,63.784476,1,4,15.946119,1.0,1.0,22.509278,18.383041,1,1,28.313036,10.146007,5.504214,-12.662816,0.034028,16.627759,63.784476,0.083334,22.808822,15.946119,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,22.509278,18.383041,0.083334,0.0,25.35585,27.259866,27.127486,26.807732,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.299544,0.0,4.425781,0.0,22.725488,0.0,22.808822,0.0,-2.547028,0.0,-4.451044,0.0
4,5,1,20,50,0.135756,25.35585,0,12.234987,0.0,0.0,7.896588,0.338464,89.140326,1,5,17.828065,1.0,1.0,22.808822,22.509278,1,1,28.313036,10.146007,2.957185,-15.209844,0.034213,18.652046,89.140326,0.083334,25.35585,17.828065,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,22.808822,22.509278,18.383041,0.083334,27.259866,27.127486,26.807732,27.864715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.547028,0.0,2.846573,0.0,6.972809,0.0,25.272516,0.0,-1.904016,0.0,-1.771635,0.0


In [19]:
target = train['pressure']

target.shape

(6036000,)

In [20]:
train.head()

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure,cross,cross2,area,time_step_cumsum,u_in_cumsum,one,count,u_in_cummean,breath_id_lag,breath_id_lag2,breath_id__u_in_lag,breath_id__u_in_lag2,breath_id_lagsame,breath_id_lag2same,breath_id__u_in__max,breath_id__u_in__mean,breath_id__u_in__diffmax,breath_id__u_in__diffmean,time_step_diff,ewm_u_in_mean,15_in_sum,15_in_min,15_in_max,15_in_mean,R_20,R_5,R_50,C_10,C_20,C_50,R__C_20__10,R__C_20__20,R__C_20__50,R__C_50__10,R__C_50__20,R__C_50__50,R__C_5__10,R__C_5__20,R__C_5__50,u_in_lag1,u_in_lag2,u_in_lag3,u_in_lag4,u_in_lag_back1,u_in_lag_back2,u_in_lag_back3,u_in_lag_back4,u_out_lag1,u_out_lag2,u_out_lag3,u_out_lag4,u_out_lag_back1,u_out_lag_back2,u_out_lag_back3,u_out_lag_back4,u_in_diff1,u_out_diff1,u_in_diff2,u_out_diff2,u_in_diff3,u_out_diff3,u_in_diff4,u_out_diff4,u_in_lagback_diff1,u_out_lagback_diff1,u_in_lagback_diff2,u_out_lagback_diff2
0,1,1,20,50,0.0,0.083334,0,5.837492,0.0,0.0,0.0,0.0,0.083334,1,1,0.083334,0.0,0.0,0.0,0.0,0,0,28.313036,10.146007,28.229702,10.062673,0.0,0.083334,0.083334,0.083334,0.083334,0.083334,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0.0,0.0,0.0,0.0,18.383041,22.509278,22.808822,25.35585,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083334,0.0,0.083334,0.0,0.083334,0.0,0.083334,0.0,-18.299707,0.0,-22.425944,0.0
1,2,1,20,50,0.033652,18.383041,0,5.907794,0.0,0.0,0.618632,0.033652,18.466375,1,2,9.233188,1.0,0.0,0.083334,0.0,1,0,28.313036,10.146007,9.929994,-8.237035,0.033652,9.585358,18.466375,0.083334,18.383041,9.233188,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0.083334,0.0,0.0,0.0,22.509278,22.808822,25.35585,27.259866,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.299707,0.0,18.383041,0.0,18.383041,0.0,18.383041,0.0,-4.126236,0.0,-4.425781,0.0
2,3,1,20,50,0.067514,22.509278,0,7.876254,0.0,0.0,2.138333,0.101167,40.975653,1,3,13.658551,1.0,1.0,18.383041,0.083334,1,1,28.313036,10.146007,5.803758,-12.363271,0.033862,14.22904,40.975653,0.083334,22.509278,13.658551,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,18.383041,0.083334,0.0,0.0,22.808822,25.35585,27.259866,27.127486,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.126236,0.0,22.425944,0.0,22.509278,0.0,22.509278,0.0,-0.299544,0.0,-2.846573,0.0
3,4,1,20,50,0.101542,22.808822,0,11.742872,0.0,0.0,4.454391,0.202709,63.784476,1,4,15.946119,1.0,1.0,22.509278,18.383041,1,1,28.313036,10.146007,5.504214,-12.662816,0.034028,16.627759,63.784476,0.083334,22.808822,15.946119,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,22.509278,18.383041,0.083334,0.0,25.35585,27.259866,27.127486,26.807732,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.299544,0.0,4.425781,0.0,22.725488,0.0,22.808822,0.0,-2.547028,0.0,-4.451044,0.0
4,5,1,20,50,0.135756,25.35585,0,12.234987,0.0,0.0,7.896588,0.338464,89.140326,1,5,17.828065,1.0,1.0,22.808822,22.509278,1,1,28.313036,10.146007,2.957185,-15.209844,0.034213,18.652046,89.140326,0.083334,25.35585,17.828065,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,22.808822,22.509278,18.383041,0.083334,27.259866,27.127486,26.807732,27.864715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.547028,0.0,2.846573,0.0,6.972809,0.0,25.272516,0.0,-1.904016,0.0,-1.771635,0.0


In [21]:
train.drop(['pressure'], axis = 1, inplace = True)

## Normalization

In [22]:
train_col_order = ["u_out"] + train.columns.drop("u_out").tolist()
test_col_order = ["u_out"] + test.columns.drop("u_out").tolist()

train = train[train_col_order]
test = test[test_col_order]

print('Train columns:')
scaler_targets = [col for col in CFG.cont_seq_cols if (col != "u_out" and col != "pressure")]

Train columns:


In [23]:
# we might want to avoid scaling the dummy cols
scaler = RobustScaler()

In [24]:
for scaler_target in tqdm(scaler_targets):
    scaler.fit(train.loc[:,[scaler_target]])
    train.loc[:,[scaler_target]] = scaler.transform(train.loc[:,[scaler_target]])
    test.loc[:,[scaler_target]] = scaler.transform(test.loc[:,[scaler_target]])
    gc.collect()

# display(train.head())
# display(test.head())

  0%|          | 0/73 [00:00<?, ?it/s]

In [25]:
train.shape, target.shape, test.shape

((6036000, 74), (6036000,), (4024000, 74))

In [26]:
len(CFG.cont_seq_cols)

75

## Reshaping

In [29]:
# X = np.float32(train.drop(["id", "breath_id"], axis=1)).reshape(-1, 80, len(CFG.cont_seq_cols))
y = np.float32(target).reshape(-1, 80, 1)

X_test = np.float32(test.drop(["id", "breath_id"], axis=1)).reshape(-1, 80, len(CFG.cont_seq_cols))
gc.collect()

print(X.shape)
print(y.shape)
print(X_test.shape)

(72432, 80, 75)
(75450, 80, 1)
(48288, 80, 75)


## Setup GPU

In [30]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import tensorflow as tf

In [31]:
# USE MULTIPLE GPUS
if os.environ['CUDA_VISIBLE_DEVICES'] == "0":
    gpu_strategy = tf.distribute.get_strategy()
    print('single strategy')
else:
    gpu_strategy = tf.distribute.MirroredStrategy()
    print('multiple strategy')

single strategy


In [32]:
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": False})
print('Mixed precision disabled')

Mixed precision disabled


## Loss

In [33]:
class L1Loss_masked(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, preds, y, u_out):

        mask = 1 - u_out
        mae = torch.abs(mask * (y - preds))
        mae = torch.sum(mae) / torch.sum(mask)

        return mae

In [34]:
# scaling layer

class my_round_func(torch.autograd.Function):
    
    @staticmethod
    def forward(ctx, input):
        return torch.round(input)

    @staticmethod
    def backward(ctx, grad_output):
        grad_input = grad_output.clone()
        return grad_input


class ScaleLayer(nn.Module):
    def __init__(self):
        super(ScaleLayer, self).__init__()
        self.min = PRESSURE_MIN
        self.max = PRESSURE_MAX
        self.step = PRESSURE_STEP
        self.my_round_func = my_round_func()

    def forward(self, inputs):
        steps = inputs.add(-self.min).divide(self.step)
        int_steps = self.my_round_func.apply(steps)
        rescaled_steps = int_steps.multiply(self.step).add(self.min)
        clipped = torch.clamp(rescaled_steps, self.min, self.max)
        return clipped

PRESSURE_MIN, PRESSURE_MAX, PRESSURE_STEP

(-1.895744294564641, 64.8209917386395, 0.07030214545121005)

In [35]:
class CustomTransformerEncoderLayer(nn.Module):
    r"""TransformerEncoderLayer is made up of self-attn and feedforward network.
    This standard encoder layer is inspired by the paper "Attention Is All You Need"
    where we have removed the dropouts and reduced the 1024 internal DIMS as well.
    Args:
        d_model: the number of expected features in the input (required).
        nhead: the number of heads in the multiheadattention models (required).
        dim_feedforward: the dimension of the feedforward network model (default=2048).
        dropout: the dropout value (default=0.1).
        activation: the activation function of intermediate layer, relu or gelu (default=relu).
        layer_norm_eps: the eps value in layer normalization components (default=1e-5).
        batch_first: If ``True``, then the input and output tensors are provided
            as (batch, seq, feature). Default: ``False``.

    Examples::
        >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
        >>> src = torch.rand(10, 32, 512)
        >>> out = encoder_layer(src)

    Alternatively, when ``batch_first`` is ``True``:
        >>> encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8, batch_first=True)
        >>> src = torch.rand(32, 10, 512)
        >>> out = encoder_layer(src)
    """
    
    __constants__ = ['batch_first']
    # batch_first is False here
    
    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
                 layer_norm_eps=1e-6
                ) -> None:
        
        factory_kwargs = {}
        super(CustomTransformerEncoderLayer, self).__init__()
        
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout, **factory_kwargs)
        
        # Implementation of Feedforward model
        self.linear1 = nn.Linear(d_model, dim_feedforward, **factory_kwargs)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model, **factory_kwargs)

        self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.activation = nn.SELU()

    def forward(self, src, src_mask= None, src_key_padding_mask= None):
        r"""Pass the input through the encoder layer.

        Args:
            src: the sequence to the encoder layer (required).
            src_mask: the mask for the src sequence (optional).
            src_key_padding_mask: the mask for the src keys per batch (optional).

        Shape:
            see the docs in Transformer class.
        """
        src2 = self.self_attn(src, src, src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
        src = src + self.dropout1(src2)
        src = self.norm1(src)
        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
        src = src + self.dropout2(src2)
        src = self.norm2(src)
        return src

In [36]:
# ====================================================
# Hybrid Model (BiLSTM + Transformer)
# ====================================================

class CustomModel(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        
        self.cfg = cfg
        self.hidden_size = self.cfg.hidden_size
        
        self.seq_emb = nn.Sequential(
            nn.Linear(len(cfg.cont_seq_cols), self.hidden_size),
            nn.LayerNorm(self.hidden_size),
            nn.SELU(),
        )
        
        self.lstm1 = nn.LSTM(
            self.hidden_size, self.hidden_size, dropout=0.0, batch_first=True, bidirectional=True
        )
        
        transformer_encoder_layer = CustomTransformerEncoderLayer(
            d_model=self.hidden_size*2, nhead=8, dim_feedforward=256, dropout=0.01,
                 layer_norm_eps=1e-6,
        )
        self.transformer = nn.TransformerEncoder(
            encoder_layer = transformer_encoder_layer,
            num_layers = 2,
        )
        
        self.head = nn.Sequential(
            nn.Linear(self.hidden_size * 2, self.hidden_size//4),
            nn.SELU(),
            nn.Linear(self.hidden_size // 4, 1),
            ScaleLayer(),
        )
        
        for n, m in self.named_modules():
            if isinstance(m, nn.LSTM):
                print(f'init {m}')
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        nn.init.orthogonal_(param.data)
                    else:
                        nn.init.normal_(param.data)
            elif isinstance(m, nn.GRU):
                print(f"init {m}")
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        init.orthogonal_(param.data)
                    else:
                        init.normal_(param.data)

    def forward(self, cont_seq_x):
        bs = cont_seq_x.size(0)
        
        seq_emb = self.seq_emb(cont_seq_x)
        seq_emb, _ = self.lstm1(seq_emb) # batch, seq_len, num_directions * hidden_size
        
        # transformer takes as input src: (S, N, E), so we have to permute it...
        seq_emb = seq_emb.transpose(1, 0) # seq_len, batch, num_directions * hidden_size
        seq_emb = self.transformer(seq_emb, mask=None, src_key_padding_mask=None)
        
        seq_emb = seq_emb.transpose(1, 0) # (BS, seq_len, embedding_dim)
        output = self.head(seq_emb) #.view(bs, -1)
        return output

print(CustomModel(CFG))

init LSTM(512, 512, batch_first=True, bidirectional=True)
CustomModel(
  (seq_emb): Sequential(
    (0): Linear(in_features=75, out_features=512, bias=True)
    (1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (2): SELU()
  )
  (lstm1): LSTM(512, 512, batch_first=True, bidirectional=True)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0): CustomTransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True)
        )
        (linear1): Linear(in_features=1024, out_features=256, bias=True)
        (dropout): Dropout(p=0.01, inplace=False)
        (linear2): Linear(in_features=256, out_features=1024, bias=True)
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (dropout1): Dropout(p=0.01, inplace=False)
        (dropout2): Dropout(p=0.01, inplace=False)
        (act

In [37]:
# ====================================================
# helper function
# ====================================================

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


scaler = GradScaler()

def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    losses = AverageMeter()
    start = end = time()
    iters = len(train_loader)
    
    for step, (inputs, y) in enumerate(train_loader):
        
        inputs, y = inputs.to(device), y.to(device)
        batch_size = inputs.size(0)
        
        with autocast():
            pred = model(inputs)
            loss = criterion(pred, y, inputs[:,:,0].reshape(-1,80,1))
        
        losses.update(loss.item(), batch_size)
        
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        
        if CFG.apex:
            scaler.scale(loss).backward()
        else:
            loss.backward()
        
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            if CFG.apex:
                scaler.step(optimizer)
            else:
                optimizer.step()
            
            optimizer.zero_grad()
            lr = 0
            
            if CFG.batch_scheduler:
                # scheduler.step(epoch + step/iters)
                scheduler.step()
                lr = scheduler.get_lr()[0]
        
        if CFG.apex:
            scaler.update()
        
        end = time()
    
    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    model.eval()
    preds = []
    losses = AverageMeter()
    start = end = time()
    
    for step, (inputs, y) in enumerate(valid_loader):
        
        inputs, y = inputs.to(device), y.to(device)
        batch_size = inputs.size(0)
        
        with torch.no_grad():
            pred = model(inputs)
        
        loss = criterion(pred, y, inputs[:,:,0].reshape(-1,80,1))
        losses.update(loss.item(), batch_size)
        
        preds.append(pred.view(-1).detach().cpu().numpy())
        
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        
        end = time()
    
    preds = np.concatenate(preds)
    return losses.avg, preds


def inference_fn(test_loader, model, device):
    model.eval()
    model.to(device)
    preds = []
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    for step, (cont_seq_x) in tk0:
        cont_seq_x = cont_seq_x.to(device)
        with torch.no_grad():
            pred = model(cont_seq_x)
        preds.append(pred.view(-1).detach().cpu().numpy())
    preds = np.concatenate(preds)
    return preds

In [38]:
# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold, trn_idx, val_idx):

    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================    
    
    train_folds = X[trn_idx]
    valid_folds = X[val_idx]
    
    y_train = y[trn_idx]
    y_true = y[val_idx]
    
    groups = train["breath_id"].unique()[val_idx]
    oof_folds = train[train["breath_id"].isin(groups)].reset_index(drop=True)

    train_dataset = torch.utils.data.TensorDataset(
        torch.from_numpy(train_folds),
        torch.from_numpy(y_train)
    )
    
    valid_dataset = torch.utils.data.TensorDataset(
        torch.from_numpy(valid_folds),
        torch.from_numpy(y_true)
    )

    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG)
    model.to(device)
    print(model)

    optimizer = AdamW(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay)
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    num_warmup_steps = int(.10*num_train_steps)
    CFG.num_warmup_steps = num_warmup_steps
    
    print("warmup_steps is applicable is ", num_warmup_steps)
    
    def get_scheduler(optimizer):
        if CFG.scheduler=='linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=CFG.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif CFG.scheduler=='cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=CFG.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=CFG.num_cycles
            )
        elif CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler

    scheduler = get_scheduler(optimizer)

    # ====================================================
    # apex
    # ====================================================
    #if CFG.apex:
    #    model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)

    # ====================================================
    # loop
    # ====================================================
    criterion = L1Loss_masked()

    best_score = np.inf

    avg_losses = []
    avg_val_losses = []
    
    for epoch in range(CFG.epochs):

        start_time = time()

        # train
        # avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, None, device) # no scheduler?
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)
        avg_losses.append(avg_loss)
        
        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        avg_val_losses.append(avg_val_loss)
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            pass

        # scoring
        score = avg_val_loss

        elapsed = time() - start_time

        best_notice = ""
        if score < best_score:
            best_notice = "Best Score"
            best_score = score
            torch.save(
                {
                    'model': model.state_dict(),
                    'preds': preds,
                },
                OUTPUT_DIR+f"fold{fold}_best.pth"
            )
    
        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s, lr: {optimizer.param_groups[0]["lr"]:.5f}, MAE Score: {score:.4f}, {best_notice}')

    plt.figure(figsize=(14,6))
    plt.plot(avg_losses, label="Train Loss")
    plt.plot(avg_val_losses, label="Train Loss")
    plt.title(f"Fold {fold + 1} - Best score {best_score:.4f}", size=18)
    plt.show(block=False)

    preds = torch.load(OUTPUT_DIR+f"fold{fold}_best.pth", map_location=torch.device('cpu'))['preds']
    oof_folds['preds'] = preds.flatten()

    torch.cuda.empty_cache()
    gc.collect()
    
    return oof_folds


In [41]:
# ====================================================
# main
# ====================================================
def main():
    
    """
    Prepare: 1.train 2.test
    """
    
    def get_result(result_df):
        result_df.reset_index(drop=True, inplace=True)
        preds = result_df['preds'].values
        labels = result_df['pressure'].values
        non_expiratory_phase_val_idx = result_df[result_df['u_out'] == 0].index # The expiratory phase is not scored
        score = get_score(labels[non_expiratory_phase_val_idx], preds[non_expiratory_phase_val_idx])
        LOGGER.info(f'Score (without expiratory phase): {score:<.6f}')
    
    if CFG.train:
        # train
        oof_df = pd.DataFrame()
        kfold = KFold(n_splits=CFG.n_fold, random_state=42, shuffle=True)
        
        for fold, (trn_idx, val_idx) in enumerate(kfold.split(X=X, y=y[:72432])):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(X, fold, trn_idx, val_idx)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
        
        # CV result
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        
        # save result
        oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)
    
    for i, breath_id in enumerate(oof_df["breath_id"].unique()):
        oof_df[oof_df["breath_id"]==breath_id].plot(
            x="time_step", 
            y=["preds", "pressure", "u_out"],
            figsize=(16, 5),
            title = f"Preds Plot for breath_id : {breath_id}"
        )
        plt.show(block=False)
        if i == 15:
            break
    
    if CFG.inference:
        test_loader = torch.utils.data.DataLoader(X_test, batch_size=512, shuffle=False, pin_memory=True)
        
        for fold in CFG.trn_fold:
            model = CustomModel(CFG)
            path = OUTPUT_DIR+f"fold{fold}_best.pth"
            state = torch.load(path, map_location=torch.device('cpu'))
            model.load_state_dict(state['model'])
            predictions = inference_fn(test_loader, model, device)
            test[f'fold{fold}'] = predictions
            del state, predictions; gc.collect()
            torch.cuda.empty_cache()
        
        # submission
        test['pressure'] = test[[f'fold{fold}' for fold in CFG.trn_fold]].mean(1)
        test[['id', 'pressure'] + [f'fold{fold}' for fold in CFG.trn_fold]].to_csv(OUTPUT_DIR+'raw_submission.csv', index=False)
        test[['id', 'pressure']].to_csv(OUTPUT_DIR+'submission.csv', index=False)

In [None]:
if __name__ == '__main__':
    main()



init LSTM(512, 512, batch_first=True, bidirectional=True)
CustomModel(
  (seq_emb): Sequential(
    (0): Linear(in_features=75, out_features=512, bias=True)
    (1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (2): SELU()
  )
  (lstm1): LSTM(512, 512, batch_first=True, bidirectional=True)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0): CustomTransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=1024, out_features=1024, bias=True)
        )
        (linear1): Linear(in_features=1024, out_features=256, bias=True)
        (dropout): Dropout(p=0.01, inplace=False)
        (linear2): Linear(in_features=256, out_features=1024, bias=True)
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (dropout1): Dropout(p=0.01, inplace=False)
        (dropout2): Dropout(p=0.01, inplace=False)
        (act

Epoch 1 - avg_train_loss: 11.5479  avg_val_loss: 13.1464  time: 482s, lr: 0.00010, MAE Score: 13.1464, Best Score
Epoch 2 - avg_train_loss: 16.5329  avg_val_loss: 12.6171  time: 483s, lr: 0.00020, MAE Score: 12.6171, Best Score
Epoch 3 - avg_train_loss: 21.5620  avg_val_loss: 14.3590  time: 505s, lr: 0.00030, MAE Score: 14.3590, 
Epoch 4 - avg_train_loss: 103.2533  avg_val_loss: 52.6471  time: 499s, lr: 0.00040, MAE Score: 52.6471, 
Epoch 5 - avg_train_loss: 49.5979  avg_val_loss: 52.6471  time: 476s, lr: 0.00050, MAE Score: 52.6471, 
Epoch 6 - avg_train_loss: 5.5686  avg_val_loss: 52.6471  time: 474s, lr: 0.00060, MAE Score: 52.6471, 
Epoch 7 - avg_train_loss: 54.9781  avg_val_loss: 52.6471  time: 474s, lr: 0.00070, MAE Score: 52.6471, 
Epoch 8 - avg_train_loss: 132.5574  avg_val_loss: 52.6471  time: 474s, lr: 0.00080, MAE Score: 52.6471, 
Epoch 9 - avg_train_loss: 82.2664  avg_val_loss: 52.6471  time: 476s, lr: 0.00090, MAE Score: 52.6471, 
Epoch 10 - avg_train_loss: 70.5404  avg_val

Epoch 79 - avg_train_loss: 70.5741  avg_val_loss: 52.6471  time: 977s, lr: 0.00013, MAE Score: 52.6471, 
