# Porpuse

This kernel influenced below kernels.

Reference:
[Ventilator Pressure / LSTM starter](https://www.kaggle.com/yasufuminakama/ventilator-pressure-lstm-starter)

# Import

In [None]:
import os
import sys

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

from tqdm.notebook import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

from transformers import AdamW
from transformers import get_linear_schedule_with_warmup

import warnings
warnings.filterwarnings("ignore")

# Config

In [None]:
APEX=False
EPOCHS=50
HIDDEN_SIZE=64
TRAIN_BATCH_SIZE=64
VALID_BATCH_SIZE=16
OUTPUT_DIR = "./"

In [None]:
if APEX:
    from apex import amp

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Utility

In [None]:
class AverageMeter:
    """
    Computes and stores the average and current value
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
class EarlyStopping:
    def __init__(self, patience=7, mode="max", delta=0.01):
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        if self.mode == "min":
            self.val_score = np.Inf
        else:
            self.val_score = -np.Inf

    def __call__(self, epoch_score, model, model_path):

        if self.mode == "min":
            score = -1.0 * epoch_score
        else:
            score = np.copy(epoch_score)

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print('EarlyStopping counter: {} out of {}'.format(self.counter, self.patience))
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
            self.counter = 0

    def save_checkpoint(self, epoch_score, model, model_path):
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            print('Validation score improved ({} --> {}). Saving model!'.format(self.val_score, epoch_score))
            torch.save(model.state_dict(), model_path)
        self.val_score = epoch_score

# Dataset

In [None]:
class BreathDataset():
    def __init__(self, df):
        self.df = df
        self.groups = df.groupby('breath_id').groups
        self.keys = list(self.groups.keys())
        
    def __len__(self):
        return len(self.keys)
    
    def __getitem__(self, idx):
        indexes = self.groups[self.keys[idx]]
        df = self.df.iloc[indexes]
                
        r = df.R.values
        c = df.C.values
        time_step = df.time_step.values
        u_in = df.u_in.values
        u_out = df.u_out.values
        breath_time = df.breath_time.values
        u_in_time = df.u_in_time.values
        pressure = df.pressure.values
        
        return {
                    "r": torch.tensor(r, dtype=torch.long),
                    "c": torch.tensor(c, dtype=torch.long),
                    "time_step": torch.tensor(time_step, dtype=torch.long),
                    "u_in": torch.tensor(u_in, dtype=torch.long),
                    "u_out": torch.tensor(u_out, dtype=torch.long),
                    "breath_time": torch.tensor(breath_time, dtype=torch.long),
                    "u_in_time": torch.tensor(u_in_time, dtype=torch.long),
                    "pressure": torch.tensor(pressure, dtype=torch.float),
                }

# Model

In [None]:
class BreathModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden_size = HIDDEN_SIZE
        self.r_emb = nn.Embedding(3, 2, padding_idx=0)
        self.c_emb = nn.Embedding(3, 2, padding_idx=0)
        self.seq_emb = nn.Sequential(
            nn.Linear(9, self.hidden_size),
            nn.LayerNorm(self.hidden_size),
            nn.ReLU(),
            nn.Dropout(0.3),
        )
        self.lstm = nn.LSTM(
            self.hidden_size,
            self.hidden_size,
            dropout=0.3,
            batch_first=True,
            bidirectional=True
        )
        self.head = nn.Sequential(
            nn.Linear(
                self.hidden_size * 2,
                self.hidden_size * 2),
            nn.LayerNorm(self.hidden_size * 2),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(self.hidden_size * 2, 1),
        )
                        
    def forward(self, r, c, time_step, u_in, u_out, breath_time, u_in_time):
        r_emb = self.r_emb(r)
        c_emb = self.c_emb(c)
        seq_x = torch.cat((r_emb, c_emb, time_step, u_in, u_out, breath_time, u_in_time), 2)
        seq_emb = self.seq_emb(seq_x)
        seq_emb, _ = self.lstm(seq_emb)
        output = self.head(seq_emb)
        return output

# Loss

In [None]:
class VentilatorLoss(nn.Module):
    """
    Directly optimizes the competition metric
    """
    def __call__(self, outputs, targets, loss_mask):

        x = outputs[loss_mask]
        y = targets[loss_mask]

        mae = (y - x).abs().mean()

        return mae

# Engine

In [None]:
def train_fn(data_loader, model, optimizer, device, scheduler=None):
    
    model.train()
    
    losses = AverageMeter()
    tk0 = tqdm(data_loader, total=len(data_loader))
    
    for batch_index, data in enumerate(tk0):
        r = data["r"]
        c = data["c"]
        time_step = data["time_step"].unsqueeze(2)
        u_in = data["u_in"].unsqueeze(2)
        u_out = data["u_out"].unsqueeze(2)
        breath_time = data["breath_time"].unsqueeze(2)
        u_in_time = data["u_in_time"].unsqueeze(2)
        targets = data["pressure"].unsqueeze(2)
 
        r = r.to(device, dtype=torch.long)
        c = c.to(device, dtype=torch.long)
        time_step = time_step.to(device, dtype=torch.long)
        u_in = u_in.to(device, dtype=torch.long)
        u_out = u_out.to(device, dtype=torch.long)
        breath_time = breath_time.to(device, dtype=torch.long)
        u_in_time = u_in_time.to(device, dtype=torch.long)
        targets = targets.to(device, dtype=torch.long)

        outputs = model(
            r,
            c,
            time_step,
            u_in,
            u_out,
            breath_time,
            u_in_time,
        )
        
        loss_fnc = VentilatorLoss()
        
        loss_mask = u_out == 1
    
        loss = loss_fnc(
                outputs,
                targets,
                loss_mask,
        )
        
        losses.update(loss.item(), len(loss_mask))

        loss.backward()
        optimizer.step()
        scheduler.step()
        

    tk0.set_postfix(loss=losses.avg) 
    print(f"train loss : {losses.avg}")  

In [None]:
def valid_fn(data_loader, model, device):
    
    losses = AverageMeter()
    
    model.eval()
    
    tk0 = tqdm(data_loader, total=len(data_loader))
    
    for batch_index, data in enumerate(tk0):
        r = data["r"]
        c = data["c"]
        time_step = data["time_step"].unsqueeze(2)
        u_in = data["u_in"].unsqueeze(2)
        u_out = data["u_out"].unsqueeze(2)
        breath_time = data["breath_time"].unsqueeze(2)
        u_in_time = data["u_in_time"].unsqueeze(2)
        targets = data["pressure"].unsqueeze(2)

        r = r.to(device, dtype=torch.long)
        c = c.to(device, dtype=torch.long)
        time_step = time_step.to(device, dtype=torch.long)
        u_in = u_in.to(device, dtype=torch.long)
        u_out = u_out.to(device, dtype=torch.long)
        breath_time = breath_time.to(device, dtype=torch.long)
        u_in_time = u_in_time.to(device, dtype=torch.long)
        targets = targets.to(device, dtype=torch.long)
        
        outputs = model(
            r,
            c,
            time_step,
            u_in,
            u_out,
            breath_time,
            u_in_time,
        )
        
        loss_fnc = VentilatorLoss()
    
        loss_mask = u_out == 1
    
        loss = loss_fnc(
                outputs,
                targets,
                loss_mask,
        )
        
        losses.update(loss.item(), len(loss_mask))
        

    tk0.set_postfix(loss=losses.avg)
    print(f"valid loss : {losses.avg}")
    
    return losses.avg

# Train

In [None]:
df = pd.read_csv("../input/ventilator-pressure-fold/ventilator_pressure_fold.csv")

for fold in tqdm(range(5)):
    
    model = BreathModel()
    model.train()
    model.to(device)

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    train_dataset = BreathDataset(
        df=df_train
    )

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=VALID_BATCH_SIZE,
        num_workers=2
    )

    valid_dataset = BreathDataset(
        df=df_valid
    )

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=TRAIN_BATCH_SIZE,
        num_workers=2
    )

    es = EarlyStopping(patience=3, mode="min")

    optimizer = AdamW(model.parameters(), lr=1e-5, weight_decay=1e-6)

    num_train_steps = int(len(train_dataset) / TRAIN_BATCH_SIZE * EPOCHS)

    scheduler = get_linear_schedule_with_warmup(
        optimizer, 
        num_warmup_steps=0, 
        num_training_steps=num_train_steps
    )

    for epoch in tqdm(range(EPOCHS)):
        print(f"Training is Starting for epoch={epoch}")
        train_fn(train_data_loader, model, optimizer, device, scheduler=scheduler)
        print(f"Validating is Starting for epoch={epoch}")
        valid_loss = valid_fn(valid_data_loader, model, device)
        
        es(valid_loss, model, model_path=f"model_{fold}.bin")
        if es.early_stop:
            print("Early stopping")
            break

# Predict

In [None]:
class TestBreathDataset():
    def __init__(self, df):
        self.df = df
        self.groups = df.groupby('breath_id').groups
        self.keys = list(self.groups.keys())
        
    def __len__(self):
        return len(self.keys)
    
    def __getitem__(self, idx):
        indexes = self.groups[self.keys[idx]]
        df = self.df.iloc[indexes]
                
        r = df.R.values
        c = df.C.values
        time_step = df.time_step.values
        u_in = df.u_in.values
        u_out = df.u_out.values
        breath_time = df.breath_time.values
        u_in_time = df.u_in_time.values
        #pressure = df.pressure.values
        
        return {
                    "r": torch.tensor(r, dtype=torch.long),
                    "c": torch.tensor(c, dtype=torch.long),
                    "time_step": torch.tensor(time_step, dtype=torch.long),
                    "u_in": torch.tensor(u_in, dtype=torch.long),
                    "u_out": torch.tensor(u_out, dtype=torch.long),
                    "breath_time": torch.tensor(breath_time, dtype=torch.long),
                    "u_in_time": torch.tensor(u_in_time, dtype=torch.long),
                    #"pressure": torch.tensor(pressure, dtype=torch.float),
                }

In [None]:
def add_feature(df):
    
    df["u_in"] = np.log1p( df["u_in"] )
    
    r_map = {5: 0, 20:1, 50:2 }
    c_map = {10:0, 20:1, 50:2 }
    df["R"] = df["R"].map(r_map)
    df["C"] = df["C"].map(c_map) 
    
    df["breath_time"] = df["time_step"] - df["time_step"].shift(1)
    df.loc[df["time_step"] == 0, "breath_time"] = 0
    df["u_in_time"] = df["u_in"] - df["u_in"] - df["u_in"].shift(1)
    df.loc[df["time_step"] == 0, "u_in_time"] = 0
    
    return df

In [None]:
def predict_fn(model, data_loader):

    preds = []

    tk0 = tqdm(data_loader, total=len(data_loader))

    for batch_index, data in enumerate(tk0):
        r = data["r"]
        c = data["c"]
        time_step = data["time_step"].unsqueeze(2)
        u_in = data["u_in"].unsqueeze(2)
        u_out = data["u_out"].unsqueeze(2)
        breath_time = data["breath_time"].unsqueeze(2)
        u_in_time = data["u_in_time"].unsqueeze(2)
        #targets = data["pressure"].unsqueeze(2)

        r = r.to(device, dtype=torch.long)
        c = c.to(device, dtype=torch.long)
        time_step = time_step.to(device, dtype=torch.long)
        u_in = u_in.to(device, dtype=torch.long)
        u_out = u_out.to(device, dtype=torch.long)
        breath_time = breath_time.to(device, dtype=torch.long)
        u_in_time = u_in_time.to(device, dtype=torch.long)
        #targets = targets.to(device, dtype=torch.long)

        outputs = model(
            r,
            c,
            time_step,
            u_in,
            u_out,
            breath_time,
            u_in_time,
        )

        test_pred = outputs.cpu().detach().numpy()
        test_pred = test_pred.flatten()

        preds.append(test_pred)
    
    final_preds = []
    for datas in preds:
        for data in datas:
            final_preds.append(data)
            
    return final_preds

In [None]:
device = torch.device("cuda")

model_0 = BreathModel()
model_0.to(device)
model_0.load_state_dict(torch.load("../input/lstm-ventilator-pressure/model_0.bin"))
model_0.eval()

model_1 = BreathModel()
model_1.to(device)
model_1.load_state_dict(torch.load("../input/lstm-ventilator-pressure/model_1.bin"))
model_1.eval()

model_2 = BreathModel()
model_2.to(device)
model_2.load_state_dict(torch.load("../input/lstm-ventilator-pressure/model_2.bin"))
model_2.eval()

model_3 = BreathModel()
model_3.to(device)
model_3.load_state_dict(torch.load("../input/lstm-ventilator-pressure/model_3.bin"))
model_3.eval()

model_4 = BreathModel()
model_4.to(device)
model_4.load_state_dict(torch.load("../input/lstm-ventilator-pressure/model_4.bin"))
model_4.eval()

In [None]:
df_test = pd.read_csv("../input/ventilator-pressure-prediction/test.csv")
df_test = add_feature(df_test)

test_dataset = TestBreathDataset(
    df=df_test,
)

data_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=VALID_BATCH_SIZE,
    num_workers=1
)

preds_0 = predict_fn(model_0, data_loader)
preds_1 = predict_fn(model_1, data_loader)
preds_2 = predict_fn(model_2, data_loader)
preds_3 = predict_fn(model_3, data_loader)
preds_4 = predict_fn(model_4, data_loader)

np_preds_0 = np.array(preds_0)
np_preds_1 = np.array(preds_1)
np_preds_2 = np.array(preds_2)
np_preds_3 = np.array(preds_3)
np_preds_4 = np.array(preds_4)

predictions = (np_preds_0 + np_preds_1 + np_preds_2 + np_preds_3 + np_preds_4) / 5

In [None]:
df_sub = pd.read_csv("../input/ventilator-pressure-prediction/sample_submission.csv")
df_sub['pressure'] = predictions
df_sub.to_csv('submission.csv', index=False)