### Introduction

In this notebook, I'm trying to integrate the public notebook for Ventillator Pressure Competition written in Pytorch to Fastai. The reason is to leverage high level API of fastai to avoid repetitive pattern ( for example fititing with a scheduler learning rate, adding some callback  like ReduceLROnPlateau )


In [1]:
!pip install -Uqq fastai

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from torch.utils.data import Dataset
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
from fastai.data.core import DataLoaders
from fastai.learner import Learner
from fastai.callback.progress import ProgressCallback
from fastai.optimizer import OptimWrapper
from torch import optim
from fastai.losses import MSELossFlat, L1LossFlat
from fastai.callback.schedule import Learner
from fastai.callback.tracker import EarlyStoppingCallback, ReduceLROnPlateau, SaveModelCallback
from fastai.data.transforms import IndexSplitter
from sklearn.preprocessing import RobustScaler, normalize
from sklearn.model_selection import KFold
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import random
import gc
import os
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
df = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/train.csv')
df_test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')

In [4]:
# max_size = 100

In [5]:
# df = df[df.breath_id < max_size]

In [6]:
def add_features(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['cross']= df['u_in']*df['u_out']
    df['cross2']= df['time_step']*df['u_out']
    
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['one'] = 1
    df['count'] = (df['one']).groupby(df['breath_id']).cumsum()
    df['u_in_cummean'] =df['u_in_cumsum'] /df['count']
    df['breath_id_lag']=df['breath_id'].shift(1).fillna(0)
    df['breath_id_lag2']=df['breath_id'].shift(2).fillna(0)
    df['breath_id_lagsame']=np.select([df['breath_id_lag']==df['breath_id']],[1],0)
    df['breath_id_lag2same']=np.select([df['breath_id_lag2']==df['breath_id']],[1],0)
    df['u_in_lag'] = df['u_in'].shift(1).fillna(0)
    df['u_in_lag'] = df['u_in_lag']*df['breath_id_lagsame']
    df['u_in_lag2'] = df['u_in'].shift(2).fillna(0)
    df['u_in_lag2'] = df['u_in_lag2']*df['breath_id_lag2same']
    df['u_out_lag2'] = df['u_out'].shift(2).fillna(0)
    df['u_out_lag2'] = df['u_out_lag2']*df['breath_id_lag2same']
    #df['u_in_lag'] = df['u_in'].shift(2).fillna(0)
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['RC'] = df['R']+df['C']
    df = pd.get_dummies(df)
    return df


train = add_features(df)
test = add_features(df_test)

In [7]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
train.drop(['pressure','id', 'breath_id','one','count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id','one','count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1)

In [8]:
RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

In [9]:
train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])

In [10]:
idx = list(range(len(train)))

In [11]:
# train_input, valid_input = train[:3000], train[3000:4000]
# train_targets, valid_targets = targets[:3000], targets[3000:4000]

In [12]:
train.shape[-2:]

(80, 25)

In [13]:
class VentilatorDataset(Dataset):
    def __init__(self, data, target):
        self.data = torch.from_numpy(data).float()
        if target is not None:
            self.targets = torch.from_numpy(target).float()
                
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if hasattr(self, 'targets'): return self.data[idx], self.targets[idx]
        else: return self.data[idx]

In [14]:
class RNNModel(nn.Module):
    def __init__(self, input_size=25):
        hidden = [512, 256, 128, 64]
        super().__init__()
        self.lstm1 = nn.LSTM(input_size, hidden[0],
                             batch_first=True, bidirectional=True)
        self.lstm2 = nn.LSTM(2 * hidden[0], hidden[1],
                             batch_first=True, bidirectional=True)
        self.lstm3 = nn.LSTM(2 * hidden[1], hidden[2],
                             batch_first=True, bidirectional=True)
        self.lstm4 = nn.LSTM(2 * hidden[2], hidden[3],
                             batch_first=True, bidirectional=True)
        self.fc1 = nn.Linear(2 * hidden[3], 50)
        self.selu = nn.SELU()
        self.fc2 = nn.Linear(50, 1)
        self._reinitialize()

    def _reinitialize(self):
        """
        Tensorflow/Keras-like initialization
        """
        for name, p in self.named_parameters():
            if 'lstm' in name:
                if 'weight_ih' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'weight_hh' in name:
                    nn.init.orthogonal_(p.data)
                elif 'bias_ih' in name:
                    p.data.fill_(0)
                    # Set forget-gate bias to 1
                    n = p.size(0)
                    p.data[(n // 4):(n // 2)].fill_(1)
                elif 'bias_hh' in name:
                    p.data.fill_(0)
            elif 'fc' in name:
                if 'weight' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'bias' in name:
                    p.data.fill_(0)

    def forward(self, x):
        x, _ = self.lstm1(x)
        x, _ = self.lstm2(x)
        x, _ = self.lstm3(x)
        x, _ = self.lstm4(x)
        x = self.fc1(x)
        x = self.selu(x)
        x = self.fc2(x)

        return x

In [15]:
# next(model.parameters())

In [16]:
batch_size = 512
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
test_dataset = VentilatorDataset(test, None)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

In [17]:
########################## Experimenting with one fold

In [18]:
from sklearn.model_selection import KFold

In [19]:
kf = KFold(n_splits=5,random_state=2021,shuffle=True)

In [20]:
cnt = 0
for train_index, valid_index in kf.split(train):
    if cnt==1:
        break
    cnt+=1

In [21]:
#train_index=list(range(int(0.95*len(train)))) ## Change to have reasonable train/valid dataset
#valid_index=list(range(int(0.95*len(train)), len(train)))

train_input, valid_input = train[train_index], train[valid_index]
train_targets, valid_targets = targets[train_index], targets[valid_index]

train_dataset = VentilatorDataset(train_input, train_targets)
valid_dataset = VentilatorDataset(valid_input, valid_targets)

train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size = batch_size, shuffle=False)

dls = DataLoaders(train_loader, valid_loader)
model = RNNModel()


In [22]:
learn = Learner(dls, model, loss_func=L1LossFlat())
#learn.lr_find()

In [23]:
del df
gc.collect()

189

In [24]:
learn.fit_one_cycle(450, lr_max=2e-3, cbs=[ReduceLROnPlateau(monitor='valid_loss', min_delta=0.5, patience=10), 
                                           SaveModelCallback(every_epoch=True)])
preds = []
with torch.no_grad():
    for data in test_loader:
        pred = model(data.to('cuda')).squeeze(-1).flatten()
        preds.extend(pred.detach().cpu().numpy())
# preds_fold.append(preds)
df_test['pressure'] = preds
#df_test[['id', 'pressure']].to_csv('submission.csv', index=False)

epoch,train_loss,valid_loss,time
0,3.845939,2.867484,00:28
1,1.94549,1.460278,00:28
2,1.271769,1.127784,00:28
3,1.054097,1.000002,00:28
4,0.952267,0.917107,00:28
5,0.896054,0.877944,00:28
6,0.855361,0.853497,00:28
7,0.81831,0.817318,00:28
8,0.792882,0.796448,00:28
9,0.772187,0.786152,00:28


Epoch 14: reducing lr to 1.6290397468735295e-05
Epoch 24: reducing lr to 3.044513325124867e-05
Epoch 34: reducing lr to 5.029864811855678e-05
Epoch 52: reducing lr to 9.527664627694891e-05
Epoch 62: reducing lr to 0.00012196628678318329
Epoch 72: reducing lr to 0.00014726396049374068
Epoch 82: reducing lr to 0.0001692096896882428
Epoch 92: reducing lr to 0.00018610319390727608
Epoch 102: reducing lr to 0.00019663562189964416
Epoch 112: reducing lr to 0.00019999895331982982
Epoch 122: reducing lr to 0.00019952351405548872
Epoch 132: reducing lr to 0.0001981863673330767
Epoch 142: reducing lr to 0.00019599909056305194
Epoch 152: reducing lr to 0.0001929806225906278
Epoch 162: reducing lr to 0.00018915709834597117
Epoch 172: reducing lr to 0.00018456162224399195
Epoch 182: reducing lr to 0.00017923398471097712
Epoch 192: reducing lr to 0.00017322031786170774
Epoch 202: reducing lr to 0.00016657268450858065
Epoch 212: reducing lr to 0.0001593486492806587
Epoch 222: reducing lr to 0.0001516

In [25]:
submission = df_test[['id', 'pressure']].copy()

In [26]:
PRESSURE_MAX,PRESSURE_MIN,PRESSURE_STEP = 64.82099173863948, -1.8957442945646408, 0.07030214545120961

In [27]:
submission["pressure"] =\
    np.round( (submission.pressure - PRESSURE_MIN)/PRESSURE_STEP ) * PRESSURE_STEP + PRESSURE_MIN
submission.pressure = np.clip(submission.pressure, PRESSURE_MIN, PRESSURE_MAX)

In [28]:
submission.to_csv('submission_new_fasat_1nov_fastai.csv', index=False)

In [29]:
########################################################################## Uncomment code below KFold Prediction