In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

import os

from sklearn.model_selection import GroupKFold

import torch
import torch.optim as optim 
import torch.nn.init as init 
import torch.nn.functional as F 
from torch.optim.lr_scheduler import CosineAnnealingLR

from transformers import AdamW 
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

In [None]:
#Bunch of useful links: 
# https://www.kaggle.com/theoviel/deep-learning-starter-simple-lstm
# https://www.kaggle.com/yasufuminakama/ventilator-pressure-lstm-starter
# https://www.kaggle.com/shujun717/1-solution-lstm-cnn-transformer-1-fold
# https://www.kaggle.com/junkoda/pytorch-lstm-with-tensorflow-like-initialization
# https://www.kaggle.com/sagarikajadon/simple-lstm-pytorch

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
class CFG:
    competition='ventilator'
    print_freq=100
    apex=False
    scheduler='CosineAnnealingLR' # ['linear', 'cosine', 'ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    
    batch_scheduler=False
    epochs=10
    num_workers=4
    
    dense_dim = 512
    lstm_dim = 512
    logit_dim = 512
    num_classes = 1
    
    input_dim=4
    matrix_dim=4
    
    hidden_size=64
    lr=5e-3
    min_lr=1e-6
    batch_size=64
    
    n_fold=5
    trn_fold=[0,1,2,3,4]
    
    train=True
    inference=True
    
    device='cuda' if torch.cuda.is_available() else "cpu"
    save_weights = True
    
    optimizer="Adam"
    loss='L1Loss'
    
    cate_seq_cols=['R', 'C']
    cont_seq_cols=[ 'u_in', 'u_out']

# Data

In [None]:
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
test_ids = test['id'].to_numpy()

In [None]:
groups = train.breath_id.values.reshape(-1, 80)[:, 0]

targets = train['pressure'].to_numpy().reshape(-1, 80)
train.drop(['breath_id'], axis=1, inplace=True)
targets

from sklearn.preprocessing import RobustScaler 
RS = RobustScaler()
train = RS.fit_transform(train)

num_features = train.shape[-1]
train = train.reshape(-1, 80, num_features)
train

In [None]:
# load dataset 
# should not be used, error in dataloading
# Inspired by https://www.kaggle.com/theoviel/deep-learning-starter-simple-lstm

import torch
from torch.utils.data import Dataset

class VentilatorDataset(Dataset): 
    def __init__(self, df): 
        if 'pressure' not in df.columns: 
            df['pressure'] = 0
        self.df = df.groupby('breath_id').agg(list).reset_index()
        self.R = np.array(self.df['R'].values.tolist())
        self.C = np.array(self.df['C'].values.tolist())
        self.u_in = np.array(self.df['u_in'].values.tolist())
        self.pressure = np.array(self.df['pressure'].values.tolist())
        self.u_out = np.array(self.df['u_out'].values.tolist())
        self.inputs = np.concatenate(
            [self.R[:,None], self.C[:, None], self.u_in[:, None], self.u_out[:, None]], 1).transpose(0, 2, 1)
        # self.u_out
        # self.input
    def __len__(self):
        return len(self.df.shape[0])
    
    def __getitem__(self, idx):
        data = {
            'input': torch.tensor(self.inputs[idx], dtype=torch.float),
            'u_out': torch.tensor(self.u_out[idx], dtype=torch.float),
            'pressure': torch.tensor(self.pressure[idx], dtype=torch.float)
        }
        return data

In [None]:
class CusDataset(Dataset): 
    def __init__(self, data, target): 
        self.data = data
        self.target = target
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        current_sample = self.data[idx, :, :]
        current_target = self.target[idx, :]
        
        return torch.tensor(current_sample, dtype = torch.float), torch.tensor(current_target, dtype = torch.float)
    

In [None]:
data = CusDataset(train, targets)
data[0]

In [None]:
#td = TrainDataset(train)
#td[69]

In [None]:
#dataset = VentilatorDataset(main_df)
#dataset[0]

In [None]:
import torch.nn as nn

class RNNModel(nn.Module): 
    def __init__(
        self, CFG,
        input_dim=CFG.input_dim,
        lstm_dim=CFG.matrix_dim,
        dense_dim=CFG.matrix_dim,
        logit_dim=CFG.matrix_dim,
        num_classes=1,
    ):
        
        super().__init__()
        self.CFG = CFG
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, dense_dim // 2),
            nn.ReLU(),
            nn.Linear(dense_dim // 2, dense_dim),
            nn.ReLU(),
        )
        
        self.lstm = nn.LSTM(dense_dim, lstm_dim, batch_first=True, bidirectional=True)
        
        self.output = nn.Sequential(
            nn.Linear(lstm_dim * 2, logit_dim),
            nn.ReLU(),
            nn.Linear(logit_dim, num_classes),
        )
        
    def forward(self, x):
        features = self.mlp(x)
    
        features, _ = self.lstm(features)
    
        pred = self.logits(features)
        return pred


In [None]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(RNNModel, self).__init__()
        
        hidden_dim= [400, 300, 200, 100]
        self.bilstm1= nn.LSTM(input_dim, hidden_dim[0], batch_first= True, bidirectional= True)
        self.norm1= nn.LayerNorm(hidden_dim[0]*2)
        
        self.bilstm2= nn.LSTM(hidden_dim[0]*2, hidden_dim[1], batch_first= True, bidirectional= True)
        self.norm2= nn.LayerNorm(hidden_dim[1]*2)
        
        self.bilstm3= nn.LSTM(hidden_dim[1]*2, hidden_dim[2], batch_first= True, bidirectional= True)
        self.norm3= nn.LayerNorm(hidden_dim[2]*2)
        
        self.bilstm4= nn.LSTM(hidden_dim[2]*2, hidden_dim[3], batch_first= True, bidirectional= True)
        self.norm4= nn.LayerNorm(hidden_dim[3]*2)
        
        self.fc1= nn.Linear(hidden_dim[3]*2, 100)
        self.fc2= nn.Linear(100, output_dim)

        
    def forward(self, X):
        pred, _= self.bilstm1(X)
        pred= self.norm1(pred)
        
        pred, _= self.bilstm2(pred)
        pred= self.norm2(pred)
        
        pred, _= self.bilstm3(pred)
        pred= self.norm3(pred)
        
        pred, _= self.bilstm4(pred)
        pred= self.norm4(pred)
        
        pred= self.fc1(pred)
        pred= F.selu(pred)
        
        pred= self.fc2(pred)
        pred= pred.squeeze(dim= 2)
        return pred

In [None]:
from torch.utils.data import DataLoader

batch_size = 64

group_kfold = GroupKFold(n_splits=CFG.n_fold)

for fold, (train_idx, valid_idx) in enumerate(group_kfold.split(train, targets, groups=groups)):
    
    print("Fold %d" % fold)
    
    train_fold = train[train_idx]
    val_fold = train[valid_idx]
    train_target = targets[train_idx]
    val_target = targets[valid_idx]

    train_dataset = CusDataset(train_fold, train_target)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=CFG.num_workers,
                              drop_last=False)
    
    valid_dataset = CusDataset(val_fold, val_target)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=CFG.num_workers,
                              drop_last=False)
    
    model = RNNModel(num_features, 1)
    model.to(CFG.device)
    model.train()
    
    criterion= nn.L1Loss()
    criterion.to(CFG.device)
    optimizer = AdamW(model.parameters(), lr=CFG.lr)
    
    for epoch in range(CFG.epochs): 
        running_loss = 0.0
        
        for i, (x,y) in enumerate(train_loader):
            inputs = x.to(CFG.device)
            labels = y.to(CFG.device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # forward + backward + optimize 
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            #print stats
            running_loss += loss.item()
            if i % 2000 == 0: 
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                running_loss = 0.0
                
    print("Finished")
    