## Get env

In [1]:
!nvidia-smi

Fri Oct  8 06:39:01 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 471.11       Driver Version: 471.11       CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:07:00.0  On |                  N/A |
|  0%   44C    P8    24W / 370W |   1460MiB / 24576MiB |      9%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# 環境によって処理を変えるためのもの
import sys
import os
IN_COLAB = 'google.colab' in sys.modules
IN_KAGGLE = 'kaggle_web_client' in sys.modules
LOCAL = not (IN_KAGGLE or IN_COLAB)
print(f'IN_COLAB:{IN_COLAB}, IN_KAGGLE:{IN_KAGGLE}, LOCAL:{LOCAL}')

IN_COLAB:False, IN_KAGGLE:False, LOCAL:True


In [3]:
# For Colab Download some datasets
# ================================
if IN_COLAB:
    # mount googledrive
    from google.colab import drive
    drive.mount('/content/drive')
    # copy kaggle.json from googledrive
    ! pip install --upgrade --force-reinstall --no-deps  kaggle > /dev/null
    ! mkdir ~/.kaggle
    ! cp "/content/drive/MyDrive/kaggle/kaggle.json" ~/.kaggle/
    ! chmod 600 ~/.kaggle/kaggle.json
    
    if not os.path.exists("/content/input/"):
        !mkdir input
        !mkdir input/features
        !kaggle datasets download -d teyosan1229/ventilator-pressure
        !unzip /content/ventilator-pressure.zip -d input/features
        !kaggle competitions download -c ventilator-pressure-prediction
        !unzip /content/ventilator-pressure-prediction.zip -d input

In [4]:
if IN_KAGGLE or IN_COLAB:
    !pip install --upgrade -q wandb
    !pip install -q pytorch-lightning
    !pip install torch_optimizer

## Import Libraries

In [5]:
# Hide Warning
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

# Python Libraries
import os
import math
import random
import glob
import pickle
from collections import defaultdict
from pathlib import Path

# Third party
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

# Visualizations
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
%matplotlib inline
sns.set(style="whitegrid")

# Utilities and Metrics
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from sklearn.preprocessing import RobustScaler, normalize
from sklearn.metrics import mean_absolute_error #[roc_auc_score, accuracy_score]

# Pytorch 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from torch.optim.optimizer import Optimizer, required
import torch_optimizer as optim

# Pytorch Lightning
import pytorch_lightning as pl
from pytorch_lightning import Callback, seed_everything
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import WandbLogger, CSVLogger

# Weights and Biases Tool
import wandb
os.environ["WANDB_API_KEY"]='68fa1bbcda0fcf7a56f3c33a0fafa45b02f1c52d'
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mteyosan1229[0m (use `wandb login --relogin` to force relogin)


True

## Config

In [6]:
class CFG:
    debug = False
    competition='ventilator'
    exp_name = "exp025"
    seed = 29
    # model
    # img_size = 384
    
    # data
    target_col = 'pressure' # 目標値のある列名
    target_size = 1
    
    # optimizer
    optimizer_name = 'RAdam'
    lr = 5e-3
    weight_decay = 1e-6
    amsgrad = False
    
    # scheduler
    epochs = 300
    scheduler = 'CosineAnnealingLR'
    T_max = 300
    min_lr = 1e-6
    # criterion
    # u_out = 1 を考慮しないLoss
    criterion_name = 'CustomLoss1'
    
    # training
    train = True
    inference = True
    n_fold = 5
    trn_fold = [0]
    precision = 16 #[16, 32, 64]
    grad_acc = 1
    # DataLoader
    loader = {
        "train": {
            "batch_size": 512,
            "num_workers": 0,
            "shuffle": True,
            "pin_memory": True,
            "drop_last": True
        },
        "valid": {
            "batch_size": 512,
            "num_workers": 0,
            "shuffle": False,
            "pin_memory": True,
            "drop_last": False
        }
    }
    # pl
    trainer = {
        'gpus': 1,
        'progress_bar_refresh_rate': 1,
        'benchmark': False,
        'deterministic': True,
        }
    # LSTM
    num_layers = 4
    feature_cols = ['time_step', 'u_in', 'u_out'] + ['area'] + ['cross', 'cross2'] + ['u_in_cumsum', 'u_in_cummean'] + \
                   ['u_in_lag','u_in_lag2','u_in_lag3','u_in_lag_back','u_in_lag_back2','u_in_lag_back3'] + \
                   ['u_out_lag','u_out_lag2','u_out_lag3','u_out_lag_back','u_out_lag_back2','u_out_lag_back3'] + \
                   ['R_20', 'R_5', 'R_50', 'C_10', 'C_20', 'C_50', 'RC_2010', 'RC_2020', 'RC_2050', 'RC_5010', 'RC_5020', 'RC_5050', 'RC_510', 'RC_520', 'RC_550'] + \
                   ['u_out0_mean', 'u_out0_max', 'u_out0_std', 'u_out1_mean', 'u_out1_max', 'u_out1_std'] + \
                   ['breath_time', 'u_in_time']
    
    dense_dim = 512
    hidden_size = 512
    logit_dim = 512
    
seed_everything(CFG.seed)
if not LOCAL:
    CFG.loader["train"]["num_workers"] = 4
    CFG.loader["valid"]["num_workers"] = 4

Global seed set to 29


In [7]:
len(CFG.feature_cols),CFG.loader["train"]["num_workers"]

(43, 0)

## Directory & LoadData

In [8]:
if IN_KAGGLE:
    INPUT_DIR = Path('../input/ventilator-pressure-prediction')
    FEAT_DIR = Path('../input/ventilator-pressure')
    OUTPUT_DIR = './'
elif IN_COLAB:
    INPUT_DIR = Path('/content/input/')
    FEAT_DIR = Path('/content/input/features/')
    OUTPUT_DIR = f'/content/drive/MyDrive/kaggle/Ventilator Pressure/{CFG.exp_name}/'
if LOCAL:
    INPUT_DIR = Path("F:/Kaggle/ventilator-pressure-prediction/data/input/")
    FEAT_DIR = Path("F:/Kaggle/ventilator-pressure-prediction/data/input/features/")
    OUTPUT_DIR = f'F:/Kaggle/ventilator-pressure-prediction/data/output/{CFG.exp_name}/'
    
def load_datasets(feats):
    dfs = [pd.read_feather(FEAT_DIR / f'{f}_train.ftr') for f in feats]
    X_train = pd.concat(dfs, axis=1)
    dfs = [pd.read_feather(FEAT_DIR / f'{f}_test.ftr') for f in feats]
    X_test = pd.concat(dfs, axis=1)
    return X_train, X_test

feats = ['Base', 'Area', 'Cross', 'U_in_cumsum_mean', 'U_in_Lag', 'U_out_Lag', 'RC_OHE', 'U_out_stat', 'Time']
df_train, df_test = load_datasets(feats)

#df_train = pd.read_csv(INPUT_DIR / "train_v2.csv")
#df_test = pd.read_csv(INPUT_DIR / "test_v2.csv")
submission = pd.read_csv(INPUT_DIR / "sample_submission.csv")
display(df_train.head())
display(df_test.head())

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

if CFG.debug:
    CFG.epochs = 5
    #CFG.inference = False
    #df_train = df_train.head(240000)

Unnamed: 0,id,breath_id,time_step,u_in,u_out,pressure,fold,area,cross,cross2,...,RC_520,RC_550,u_out0_mean,u_out0_max,u_out0_std,u_out1_mean,u_out1_max,u_out1_std,breath_time,u_in_time
0,1,1,0.0,0.080043,0,5.837492,4,0.0,0.0,0.0,...,0,0,22.119824,28.313036,5.829997,2.961716,4.987079,2.1621,0.0,0.0
1,2,1,0.033652,2.964399,0,5.907794,4,0.618632,0.0,0.0,...,0,0,22.119824,28.313036,5.829997,2.961716,4.987079,2.1621,0.033652,18.299707
2,3,1,0.067514,3.157395,0,7.876254,4,2.138333,0.0,0.0,...,0,0,22.119824,28.313036,5.829997,2.961716,4.987079,2.1621,0.033862,4.126236
3,4,1,0.101542,3.170056,0,11.742872,4,4.454391,0.0,0.0,...,0,0,22.119824,28.313036,5.829997,2.961716,4.987079,2.1621,0.034028,0.299544
4,5,1,0.135756,3.27169,0,12.234987,4,7.896588,0.0,0.0,...,0,0,22.119824,28.313036,5.829997,2.961716,4.987079,2.1621,0.034213,2.547028


Unnamed: 0,id,breath_id,time_step,u_in,u_out,area,cross,cross2,u_in_cumsum,u_in_cummean,...,RC_520,RC_550,u_out0_mean,u_out0_max,u_out0_std,u_out1_mean,u_out1_max,u_out1_std,breath_time,u_in_time
0,1,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,...,1,0,19.702022,37.542219,14.196737,2.763761,4.973375,2.166618,0.0,0.0
1,2,0,0.031904,2.141835,0,0.239758,0.0,0.0,7.515046,3.757523,...,1,0,19.702022,37.542219,14.196737,2.763761,4.973375,2.166618,0.031904,7.515046
2,3,0,0.063827,2.750578,0,1.174935,0.0,0.0,22.166721,7.388907,...,1,0,19.702022,37.542219,14.196737,2.763761,4.973375,2.166618,0.031924,7.13663
3,4,0,0.095751,3.10147,0,3.207788,0.0,0.0,43.397331,10.849333,...,1,0,19.702022,37.542219,14.196737,2.763761,4.973375,2.166618,0.031924,6.578935
4,5,0,0.127644,3.307654,0,6.567489,0.0,0.0,69.718287,13.943657,...,1,0,19.702022,37.542219,14.196737,2.763761,4.973375,2.166618,0.031893,5.090346


## Utils

In [9]:
# LINEに通知
import requests
def send_line_notification(message):
    env = ""
    if IN_COLAB: env = "colab"
    elif IN_KAGGLE: env = "kaggle"
    elif LOCAL: env = "local"
        
    line_token = '8vBbxd0jENU39kV2ROEwp78jAzeankBFi7AG0JjoU3j'
    endpoint = 'https://notify-api.line.me/api/notify'
    message = f"[{env}]{message}"
    payload = {'message': message}
    headers = {'Authorization': 'Bearer {}'.format(line_token)}
    requests.post(endpoint, data=payload, headers=headers)

## CV Split

In [10]:
# df_train["fold"] = -1
# Fold = GroupKFold(n_splits=CFG.n_fold)
# for n, (train_index, val_index) in enumerate(Fold.split(df_train, df_train[CFG.target_col], groups=df_train.breath_id.values)):
#      df_train.loc[val_index, 'fold'] = int(n)
# df_train['fold'] = df_train['fold'].astype(int)
print(df_train.groupby(['fold', 'breath_id']).size())

fold  breath_id
0     4            80
      16           80
      18           80
      20           80
      23           80
                   ..
4     125721       80
      125723       80
      125726       80
      125734       80
      125739       80
Length: 75450, dtype: int64


## Transforms

## Dataset

In [11]:
"""
X[0]でbreath_id1つ分のシーケンス長80のデータを取得できる
"""
class TrainDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.u_out = self.X[:,:,2]#[todo]スマートじゃない
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        x = torch.FloatTensor(self.X[idx])
        u_out = torch.LongTensor(self.u_out[idx])
        label = torch.FloatTensor(self.y[idx]).squeeze(1)
        return x, u_out, label
    
class TestDataset(Dataset):
    def __init__(self, X):
        self.X = X
        self.u_out = self.X[:,:,2]
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return torch.FloatTensor(self.X[idx])

## RobustScaler & Reshape
pandas.DataFrameからnumpyに変換、シーケンス分をまとめる

In [12]:
cols = CFG.feature_cols.copy()
cols.remove('u_out')
for col in tqdm(cols):
    RS = RobustScaler()
    df_train[[col]] = RS.fit_transform(df_train[[col]])
    df_test[[col]] = RS.transform(df_test[[col]])
display(df_train.head())
display(df_test.head())

Unnamed: 0,id,breath_id,time_step,u_in,u_out,pressure,fold,area,cross,cross2,...,RC_520,RC_550,u_out0_mean,u_out0_max,u_out0_std,u_out1_mean,u_out1_max,u_out1_std,breath_time,u_in_time
0,1,1,-0.989052,-1.100643,0,5.837492,4,-0.516581,0.0,-0.665541,...,0.0,0.0,0.972385,0.112313,-0.006965,0.187703,0.085993,-0.250334,-15.475938,-0.044815
1,2,1,-0.963608,0.878825,0,5.907794,4,-0.514031,0.0,-0.665541,...,0.0,0.0,0.972385,0.112313,-0.006965,0.187703,0.085993,-0.250334,0.103311,124.171952
2,3,1,-0.938006,1.011274,0,7.876254,4,-0.507768,0.0,-0.665541,...,0.0,0.0,0.972385,0.112313,-0.006965,0.187703,0.085993,-0.250334,0.200442,27.963705
3,4,1,-0.912278,1.019963,0,11.742872,4,-0.498222,0.0,-0.665541,...,0.0,0.0,0.972385,0.112313,-0.006965,0.187703,0.085993,-0.250334,0.277152,1.988466
4,5,1,-0.886409,1.089713,0,12.234987,4,-0.484036,0.0,-0.665541,...,0.0,0.0,0.972385,0.112313,-0.006965,0.187703,0.085993,-0.250334,0.363024,17.244182


Unnamed: 0,id,breath_id,time_step,u_in,u_out,area,cross,cross2,u_in_cumsum,u_in_cummean,...,RC_520,RC_550,u_out0_mean,u_out0_max,u_out0_std,u_out1_mean,u_out1_max,u_out1_std,breath_time,u_in_time
0,1,0,-0.989052,-1.155575,0,-0.516581,0.0,-0.665541,-0.725447,-0.655828,...,1.0,0.0,0.785774,0.334769,0.835404,-0.659608,-0.705398,0.609389,-15.475938,-0.044815
1,2,0,-0.96493,0.314318,0,-0.515592,0.0,-0.665541,-0.705662,-0.322625,...,1.0,0.0,0.785774,0.334769,0.835404,-0.659608,-0.705398,0.609389,-0.706181,50.966638
2,3,0,-0.940793,0.732085,0,-0.511738,0.0,-0.665541,-0.667087,-0.000608,...,1.0,0.0,0.785774,0.334769,0.835404,-0.659608,-0.705398,0.609389,-0.69702,48.397986
3,4,0,-0.916656,0.972894,0,-0.50336,0.0,-0.665541,-0.611192,0.306249,...,1.0,0.0,0.785774,0.334769,0.835404,-0.659608,-0.705398,0.609389,-0.696909,44.612401
4,5,0,-0.892543,1.114394,0,-0.489513,0.0,-0.665541,-0.541895,0.580642,...,1.0,0.0,0.785774,0.334769,0.835404,-0.659608,-0.705398,0.609389,-0.711369,34.507996


In [13]:
X = np.float32(df_train[CFG.feature_cols]).reshape(-1, 80, len(CFG.feature_cols))
test_X = np.float32(df_test[CFG.feature_cols]).reshape(-1, 80, len(CFG.feature_cols))
y = np.float32(df_train["pressure"]).reshape(-1, 80, 1)
Fold = np.int16(df_train["fold"]).reshape(-1, 80, 1)
Fold = Fold.mean(axis=1).flatten()
print(X.shape, y.shape, test_X.shape, Fold.shape)

(75450, 80, 43) (75450, 80, 1) (50300, 80, 43) (75450,)


In [14]:
print(CFG.feature_cols)
print(len(CFG.feature_cols))
X[0,:,2]

['time_step', 'u_in', 'u_out', 'area', 'cross', 'cross2', 'u_in_cumsum', 'u_in_cummean', 'u_in_lag', 'u_in_lag2', 'u_in_lag3', 'u_in_lag_back', 'u_in_lag_back2', 'u_in_lag_back3', 'u_out_lag', 'u_out_lag2', 'u_out_lag3', 'u_out_lag_back', 'u_out_lag_back2', 'u_out_lag_back3', 'R_20', 'R_5', 'R_50', 'C_10', 'C_20', 'C_50', 'RC_2010', 'RC_2020', 'RC_2050', 'RC_5010', 'RC_5020', 'RC_5050', 'RC_510', 'RC_520', 'RC_550', 'u_out0_mean', 'u_out0_max', 'u_out0_std', 'u_out1_mean', 'u_out1_max', 'u_out1_std', 'breath_time', 'u_in_time']
43


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)

In [15]:
ds = TrainDataset(X,y)
print(ds[0])
del ds

(tensor([[-9.8905e-01, -1.1006e+00,  0.0000e+00,  ..., -2.5033e-01,
         -1.5476e+01, -4.4815e-02],
        [-9.6361e-01,  8.7883e-01,  0.0000e+00,  ..., -2.5033e-01,
          1.0331e-01,  1.2417e+02],
        [-9.3801e-01,  1.0113e+00,  0.0000e+00,  ..., -2.5033e-01,
          2.0044e-01,  2.7964e+01],
        ...,
        [ 9.9323e-01,  7.1990e-02,  1.0000e+00,  ..., -2.5033e-01,
          2.7351e-01, -2.1968e-02],
        [ 1.0189e+00,  7.2315e-02,  1.0000e+00,  ..., -2.5033e-01,
          2.5143e-01, -2.5566e-02],
        [ 1.0446e+00,  7.2590e-02,  1.0000e+00,  ..., -2.5033e-01,
          2.7373e-01, -2.8552e-02]]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1]), tensor([ 5.8375,  5.9078,  7.8763, 11.7429, 12.2350, 12.8677, 14.6956, 15.8907,
        15.5

## DataModule

In [16]:
class DataModule(pl.LightningDataModule):
    """
    numpy arrayで受け取る
    """
    def __init__(self, tr_X, tr_y, val_X, val_y, test_X, cfg):
        super().__init__()
        self.train_data = tr_X
        self.train_label = tr_y
        self.valid_data = val_X
        self.valid_label = val_y
        self.test_data = test_X
        self.cfg = cfg
        
    def setup(self, stage=None):
        self.train_dataset = TrainDataset(self.train_data, self.train_label)
        self.valid_dataset = TrainDataset(self.valid_data, self.valid_label)
        self.test_dataset = TestDataset(self.test_data)
        
    # Trainer.fit() 時に呼び出される
    def train_dataloader(self):
        return DataLoader(self.train_dataset, **self.cfg.loader['train'])

    # Trainer.fit() 時に呼び出される
    def val_dataloader(self):
        return DataLoader(self.valid_dataset, **self.cfg.loader['valid'])

    def test_dataloader(self):
        return DataLoader(self.test_dataset, **self.cfg.loader['valid'])

In [17]:
Data = DataModule(X[Fold==0],y[Fold==0],X[Fold==1],y[Fold==1],X[Fold==2], CFG)
Data.setup()
loader = Data.train_dataloader()
tmp = loader.__iter__()
print(tmp.next())
del Data, loader, tmp

[tensor([[[-9.8905e-01,  6.9788e-01,  0.0000e+00,  ..., -1.7735e+00,
          -1.5476e+01, -4.4815e-02],
         [-9.6265e-01,  8.6767e-01,  0.0000e+00,  ..., -1.7735e+00,
           6.8863e-01,  2.8327e+01],
         [-9.3689e-01,  8.5706e-01,  0.0000e+00,  ..., -1.7735e+00,
           2.9989e-01, -2.0312e+00],
         ...,
         [ 1.0182e+00,  7.2502e-02,  1.0000e+00,  ..., -1.7735e+00,
           2.7539e-01, -2.7581e-02],
         [ 1.0439e+00,  7.2747e-02,  1.0000e+00,  ..., -1.7735e+00,
           2.8962e-01, -3.0265e-02],
         [ 1.0705e+00,  7.2960e-02,  1.0000e+00,  ..., -1.7735e+00,
           7.6490e-01, -3.2205e-02]],

        [[-9.8905e-01,  2.7076e-01,  0.0000e+00,  ..., -3.8604e-01,
          -1.5476e+01, -4.4815e-02],
         [-9.6320e-01,  9.7959e-02,  0.0000e+00,  ..., -3.8604e-01,
           3.5077e-01, -1.2120e+01],
         [-9.3742e-01,  3.1189e-01,  0.0000e+00,  ..., -3.8604e-01,
           3.1236e-01,  1.5380e+01],
         ...,
         [ 9.9281e-01,  

## Pytorch Lightning Module

In [18]:
# ====================================================
# model
# ====================================================
class CustomModel(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.dense_dim = cfg.dense_dim #埋め込み特徴量次元
        self.hidden_size = cfg.hidden_size
        self.num_layers = cfg.num_layers
        self.logit_dim = cfg.logit_dim
        # nn.Embedding(vocab_size, emb_dim)
        # 1層パーセプトロンのようなもの
        #self.r_emb = nn.Embedding(3, 2, padding_idx=0)
        #self.c_emb = nn.Embedding(3, 2, padding_idx=0)
        self.mlp = nn.Sequential(
            nn.Linear(len(cfg.feature_cols), self.dense_dim // 2),
            nn.ReLU(),
            nn.Linear(self.dense_dim // 2, self.dense_dim),
            #nn.Dropout(0.2),
            nn.ReLU(),
        )
        self.lstm1 = nn.LSTM(self.dense_dim, self.dense_dim//2,
                            dropout=0.1, batch_first=True, bidirectional=True)
        self.lstm2 = nn.LSTM(self.dense_dim//2 * 2, self.dense_dim//4,
                            dropout=0.1, batch_first=True, bidirectional=True)
        self.lstm3 = nn.LSTM(self.dense_dim//4 * 2, self.dense_dim//8,
                            dropout=0.1, batch_first=True, bidirectional=True)
        self.head = nn.Sequential(
            nn.LayerNorm(self.hidden_size//8 * 2),
            nn.GELU(),
            #nn.Dropout(0.),
            nn.Linear(self.hidden_size//8 * 2, 1),
        )
        # LSTMやGRUは直交行列に初期化する
        for n, m in self.named_modules():
            if isinstance(m, nn.LSTM):
                print(f'init {m}')
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        nn.init.orthogonal_(param.data)
                    else:
                        nn.init.normal_(param.data)
            elif isinstance(m, nn.GRU):
                print(f"init {m}")
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        nn.init.orthogonal_(param.data)
                    else:
                        nn.init.normal_(param.data)

    def forward(self, x):
        bs = x.size(0)
        features = self.mlp(x)
        features, _ = self.lstm1(features)
        features, _ = self.lstm2(features)
        features, _ = self.lstm3(features)
        output = self.head(features).view(bs, -1)
        return output
    
def get_model(cfg):
    model = CustomModel(cfg)
    return model

# ====================================================
# criterion
# ====================================================
def compute_metric(df, preds):
    """
    Metric for the problem, as I understood it.
    """
    
    y = np.array(df['pressure'].values.tolist())
    w = 1 - np.array(df['u_out'].values.tolist())
    
    assert y.shape == preds.shape and w.shape == y.shape, (y.shape, preds.shape, w.shape)
    
    mae = w * np.abs(y - preds)
    mae = mae.sum() / w.sum()
    
    return mae

class VentilatorLoss(nn.Module):
#     """
#     Directly optimizes the competition metric
#     """
#     def __call__(self, preds, y, u_out):
#         w = 1 - u_out
#         mae = w * (y - preds).abs()
#         mae = mae.sum(-1) / w.sum(-1)

#         return mae
    def __init__(self):
        super().__init__()
    def forward(self, preds, y, u_out):

        mask = 1 - u_out
        mae = torch.abs(mask * (y - preds))
        mae = torch.sum(mae) / torch.sum(mask)

        return mae

def get_criterion():
    if CFG.criterion_name == 'BCEWithLogitsLoss':
        # plだとto(device)いらない
        criterion = nn.BCEWithLogitsLoss(reduction="mean")
    if CFG.criterion_name == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss()
    if CFG.criterion_name == 'CustomLoss1':
        # [reference]https://www.kaggle.com/theoviel/deep-learning-starter-simple-lstm
        criterion = VentilatorLoss()
    else:
        raise NotImplementedError
    return criterion
# ====================================================
# optimizer
# ====================================================
def get_optimizer(model: nn.Module, config: dict):
    """
    input:
    model:model
    config:optimizer_nameやlrが入ったものを渡す
    
    output:optimizer
    """
    optimizer_name = config.optimizer_name
    if 'Adam' == optimizer_name:
        return Adam(model.parameters(),
                    lr=config.lr,
                    weight_decay=config.weight_decay,
                    amsgrad=config.amsgrad)
    elif 'RAdam' == optimizer_name:
        return optim.RAdam(model.parameters(),
                           lr=config.lr,
                           weight_decay=config.weight_decay)
    elif 'sgd' == optimizer_name:
        return SGD(model.parameters(),
                   lr=config.lr,
                   momentum=0.9,
                   nesterov=True,
                   weight_decay=config.weight_decay,)
    else:
        raise NotImplementedError

# ====================================================
# scheduler
# ====================================================
def get_scheduler(optimizer):
    if CFG.scheduler=='ReduceLROnPlateau':
        """
        factor : 学習率の減衰率
        patience : 何ステップ向上しなければ減衰するかの値
        eps : nanとかInf回避用の微小数
        """
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
    elif CFG.scheduler=='CosineAnnealingLR':
        """
        T_max : 1 半周期のステップサイズ
        eta_min : 最小学習率(極小値)
        """
        scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
    elif CFG.scheduler=='CosineAnnealingWarmRestarts':
        """
        T_0 : 初期の繰りかえし回数
        T_mult : サイクルのスケール倍率
        """
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
    else:
        raise NotImplementedError
    return scheduler

In [19]:
# # modelの動作確認
# model = get_model(CFG)
# Data = DataModule(df_train,df_train,df_train, CFG)
# Data.setup()
# loader = Data.train_dataloader()
# tmp = loader.__iter__()
# x, u_out, label = tmp.next()
# print(x.shape, u_out.shape, label.shape)
# output = model(x)
# print(output.shape)
# del model, Data, loader, tmp,x, u_out, label, output

In [20]:
# #schedulerの確認
# model = get_model(CFG)
# optimizer = get_optimizer(model, CFG)
# scheduler = get_scheduler(optimizer)
# from pylab import rcParams
# lrs = []
# for epoch in range(1, CFG.epochs+1):
#     scheduler.step(epoch-1)
#     lrs.append(optimizer.param_groups[0]["lr"])
# rcParams['figure.figsize'] = 20,3
# print(lrs)
# plt.plot(lrs)

In [21]:
class Trainer(pl.LightningModule):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.model = get_model(cfg)
        self.criterion = get_criterion()
    
    def forward(self, x):
        output = self.model(x)
        return output
    
    def training_step(self, batch, batch_idx):
        x, u_out, y = batch
        # mixup とかしたい場合はここに差し込む
        output = self.forward(x)
        labels = y#.unsqueeze(1)
        loss = self.criterion(output, labels ,u_out).mean()
        #self.log_dict(dict(train_loss=loss))
        self.log('train_loss', loss, on_step=True, prog_bar=True, logger=True)
        return {"loss": loss, "predictions": output, "labels": labels}
    
    def training_epoch_end(self, outputs):
        # training_stepの出力のまとまりがoutputsに入っている。
        self.log("lr", self.optimizer.param_groups[0]['lr'], prog_bar=True, logger=True)
    
    def validation_step(self, batch, batch_idx):
        x, u_out, y = batch
        output = self.forward(x)
        labels = y#.unsqueeze(1)
        loss = self.criterion(output,labels ,u_out).mean()
        self.log('val_loss', loss, on_step= True, prog_bar=True, logger=True)
        return {"predictions": output,
                "labels": labels,
                "loss": loss.item()}
    
    def validation_epoch_end(self, outputs):
        preds = []
        labels = []
        loss = 0
        for output in outputs:
            preds += output['predictions']
            labels += output['labels']
            loss += output['loss']

        labels = torch.stack(labels)
        preds = torch.stack(preds)
        loss = loss/len(outputs)
        
        self.log("val_loss_epoch", loss, prog_bar=True, logger=True)
        
    def predict_step(self, batch, batch_idx):
        x = batch
        output = self.forward(x)
        return output
        
    def test_step(self, batch, batch_idx):
        x = batch       
        output = self.forward(x)
        return output
    
    def configure_optimizers(self):
        self.optimizer = get_optimizer(self, self.cfg)
        self.scheduler = {'scheduler': get_scheduler(self.optimizer),
                          'interval': 'step', # or 'epoch'
                          'frequency': 1}
        return {'optimizer': self.optimizer, 'lr_scheduler': self.scheduler}

## Train

In [22]:
def train() -> None:
    for fold in range(CFG.n_fold):
        if not fold in CFG.trn_fold:
            continue
        print(f"{'='*38} Fold: {fold} {'='*38}")
        # Logger
        #======================================================
        lr_monitor = LearningRateMonitor(logging_interval='step')
        # 学習済重みを保存するために必要
        loss_checkpoint = ModelCheckpoint(
            dirpath=OUTPUT_DIR,
            filename=f"best_loss_fold{fold}",
            monitor="val_loss",
            save_last=True,
            save_top_k=1,
            save_weights_only=True,
            mode="min",
        )
        
        wandb_logger = WandbLogger(
            project=f'{CFG.competition}',
            group= f'{CFG.exp_name}',
            name = f'Fold{fold}',
            save_dir=OUTPUT_DIR
        )
        data_module = DataModule(X[Fold!=fold], y[Fold!=fold],
                                 X[Fold==fold], y[Fold==fold],
                                 test_X,
                                 CFG
                                )
        data_module.setup()
        
        CFG.T_max = int(math.ceil(len(data_module.train_dataloader())/CFG.grad_acc)*CFG.epochs)
        print(f"set schedular T_max {CFG.T_max}")
        #early_stopping_callback = EarlyStopping(monitor='val_loss_epoch',mode="min", patience=5)
        
        trainer = pl.Trainer(
            logger=wandb_logger,
            callbacks=[loss_checkpoint],#lr_monitor,early_stopping_callback
            default_root_dir=OUTPUT_DIR,
            accumulate_grad_batches=CFG.grad_acc,
            max_epochs=CFG.epochs,
            precision=CFG.precision,
            **CFG.trainer
        )
        # 学習
        model = Trainer(CFG)
        trainer.fit(model, data_module)
        torch.save(model.model.state_dict(),OUTPUT_DIR + '/' + f'{CFG.exp_name}_fold{fold}.pth')
        
        del model, data_module
        # テストデータを予測して保存
        if CFG.inference:
            data_module = DataModule(X[0:1], y[0:1], X[0:1], y[0:1], test_X, CFG)
            data_module.setup()
            # best loss modelのロード
            best_model = Trainer.load_from_checkpoint(cfg=CFG,checkpoint_path=loss_checkpoint.best_model_path)
            predictions = trainer.predict(best_model, data_module.test_dataloader())
            preds = []
            for p in predictions:
                preds += p
            preds = torch.stack(preds).flatten()
            submission['pressure'] = preds.to('cpu').detach().numpy()
            submission.to_csv(OUTPUT_DIR + '/' + f'{CFG.exp_name}sub_fold{fold}.csv',index=False)
        
        wandb.finish()

        
        

In [23]:
train()
send_line_notification("finished")
wandb.finish()



Using native 16bit precision.
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"DataModule.{name} has already been called, so it will not be called again. "
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


set schedular T_max 35100
init LSTM(512, 256, batch_first=True, dropout=0.1, bidirectional=True)
init LSTM(512, 128, batch_first=True, dropout=0.1, bidirectional=True)
init LSTM(256, 64, batch_first=True, dropout=0.1, bidirectional=True)


[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade



  | Name      | Type           | Params
---------------------------------------------
0 | model     | CustomModel    | 2.5 M 
1 | criterion | VentilatorLoss | 0     
---------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.170    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 29


Training: -1it [00:00, ?it/s]

  f"One of the returned values {set(extra.keys())} has a `grad_fn`. We will detach it automatically"


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


init LSTM(512, 256, batch_first=True, dropout=0.1, bidirectional=True)
init LSTM(512, 128, batch_first=True, dropout=0.1, bidirectional=True)
init LSTM(256, 64, batch_first=True, dropout=0.1, bidirectional=True)


Predicting: 67it [00:00, ?it/s]

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_loss,
epoch,127.0
trainer/global_step,14899.0
_step,4361.0
_runtime,997.0
_timestamp,1633643803.0
val_loss_step,
val_loss_epoch,
lr,0.0031


0,1
train_loss,█▃▂▁▁▁▂▁▁▁▁▁▁▁▂
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
trainer/global_step,▁▁▁▁▂▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▅▂▂▂▂▂▂▂▂▂▂▂▃▇▃▃▃█▃
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_loss_step,█▃▃▂▂▁▂▂▁▂▁▂▁▁▂
val_loss_epoch,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,██████████▇▇▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▁▁


+ TestDatasetでなぜu_outを使っていない？
+ hidden_sizeの指す場所がよくわかってない