In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from torch import nn
import torch

import pytorch_lightning as pl

print("TORCH:", torch.__version__)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

from collections import OrderedDict, Counter
from tqdm import tqdm
import random

In [None]:
# Function for setting the seed
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():  # GPU operation have separate seed
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

set_seed(42)

In [None]:
datapath = '../input/ventilator-pressure-prediction'

train = pd.read_csv(datapath + '/train.csv')
test = pd.read_csv(datapath + '/test.csv')

print('Train Shape -> ', end='')
print(train.shape)
print(train.head())

print('\nTest Shape -> ', end='')
print(test.shape)
print(test.head())

count = list(Counter(train.breath_id).values())

N_TIME_STEPS_PER_EXAMPLE = max(count)
assert N_TIME_STEPS_PER_EXAMPLE == min(count)

In [None]:
def add_diff(array: np.array, index: int = 0):
    data = array.copy()
    adding = np.zeros(data[:, :, index].shape)
    adding[:, 1:] = data[:, :-1, index]
    data[:, :, index] -= adding
    return data

def pressure_log(array, index: int = 0):
    data = np.zeros_like(array[:, :, index])
    data[:, 1:] = array[:, :-1, index]
    return data
    

def featurize(dataframe: pd.DataFrame):
    # Dropping unecessary columns
    data = dataframe.copy().drop(columns=['id', 'breath_id'])
    
    # Nomalizing some features
    data['norm_R'] = (data.R - data.R.min()) / (data.R.max() - data.R.min())
    data['norm_C'] = (data.C - data.C.min()) / (data.C.max() - data.C.min())
    
    # Adding the difference between some features
    data['time_step_diff'] = add_diff(data.time_step.to_numpy().reshape(-1, N_TIME_STEPS_PER_EXAMPLE, 1)).flatten()
    data['u_in_diff'] = add_diff(data.u_in.to_numpy().reshape(-1, N_TIME_STEPS_PER_EXAMPLE, 1)).flatten()

    # New cross features
    data['time_cross_var'] = data.time_step * data.time_step_diff
    data['u_in_cross_var_in_time'] = data.u_in * data.u_in_diff * data.time_cross_var
    data['norm_R_C_time_cross_var'] = data.norm_R * data.norm_C * data.time_cross_var
    data['u_in_norm_R_C'] = data.norm_R * data.norm_C * data.u_in
    data['u_in_norm_R_C_time_cross_var'] = data.u_in_norm_R_C * data.time_cross_var 
    data['norm_R_C_u_in_cross_var'] = data.norm_R * data.norm_C * data.u_in_diff

    # Dropping some features that I don't want to use
    data = data.drop(columns=['R', 'C', 'time_step', 'norm_C', 'norm_R'])
    
    return data

In [None]:
training_features_dataframe = featurize(train.drop(columns='pressure'))
training_targets_dataframe = train.pressure
training_features_dataframe.head(7)

In [None]:
training_targets_dataframe.head(7)

In [None]:
N_FEATURES = len(training_features_dataframe.columns)

training_features = training_features_dataframe.to_numpy().reshape(-1, N_TIME_STEPS_PER_EXAMPLE, N_FEATURES)
training_targets = training_targets_dataframe.to_numpy().reshape(-1, N_TIME_STEPS_PER_EXAMPLE)

print('Features Shape:', training_features.shape, 'Targets Shape:', training_targets.shape)

In [None]:
X_train, X_dev, y_train, y_dev = train_test_split(training_features, training_targets, test_size=0.2)
X_dev, X_test, y_dev, y_test = train_test_split(X_dev, y_dev, test_size=0.5)

print('Train Shape:', X_train.shape, '\nDev Shape:', X_dev.shape, '\nTest shape:', X_test.shape)

In [None]:
BATCH_SIZE = 50

trainset = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
devset = TensorDataset(torch.from_numpy(X_dev), torch.from_numpy(y_dev))
testset = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))

trainLoader = DataLoader(trainset, BATCH_SIZE, shuffle=False)
devLoader = DataLoader(devset, BATCH_SIZE, shuffle=False)
testLoader = DataLoader(testset, BATCH_SIZE, shuffle=False)

In [None]:
def hidden_block(x, y, activation: str = 'relu', drop: float = 0.05):
    activations = nn.ModuleDict([
        ['selu', nn.SELU()],
        ['relu', nn.ReLU()],
        ['lrelu', nn.LeakyReLU()],
        ['none', nn.Identity()]
    ])
    return nn.Sequential(nn.Linear(x, y), nn.Dropout(drop), activations[activation])


class LongShortTermNetwork(pl.LightningModule):

    def __init__(self, input_size: int, output_size: int, lstm_hidden_size: int, num_layers: int,
        bidirectional: bool, lstm_drop: float = 0, linear_hidden_sizes: list = [256, 64], hidden_activation: str = 'selu') -> None:
        super().__init__()
        self.save_hyperparameters()

        self.in_size = input_size
        self.out_size = output_size
        self.lstm_hidden_size = lstm_hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.n_stacks = 1 + int(self.bidirectional)
        
        self.lstm_layer = nn.LSTM(
            input_size=self.in_size,
            hidden_size=self.lstm_hidden_size,
            num_layers=self.num_layers,
            batch_first=True,
            bidirectional=bidirectional,
            dropout=lstm_drop
        )

        self.linear_sizes = [2 * self.lstm_hidden_size, *linear_hidden_sizes]
        self.hidden_layer = nn.Sequential(OrderedDict([
            (f'block_{i}', hidden_block(x, y, hidden_activation, 0.2))
                for i, (x, y) in enumerate(zip(self.linear_sizes, self.linear_sizes[1:]), 1)
        ]))

        self.output = nn.Linear(self.linear_sizes[-1], self.out_size)
        
        
    def init_hidden(self, n_samples: int):
        total_layers = self.n_stacks * self.num_layers
        weights = (
            torch.zeros(total_layers, n_samples, self.lstm_hidden_size).float(),
            torch.zeros(total_layers, n_samples, self.lstm_hidden_size).float()
        )

        if torch.cuda.is_available():
            weights = tuple(each.cuda() for each in weights)

        return weights

    def forward(self, input_t: torch.TensorType, hidden=None, prev_pred=None):
        if hidden is not None:
            h_t, c_t = hidden
        else:
            h_t, c_t = self.init_hidden(input_t.size(0))
        
        out, (h_t, c_t) = self.lstm_layer(input_t, (h_t, c_t))
        out = self.hidden_layer(out)
        out = self.output(out)
        
        if hidden is not None:
            return out, (h_t, c_t)
        return out
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), 3e-3)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=4)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "val_loss"}
    
    def training_step(self, train_batch, batch_idx):
        self.train()
        x, y = train_batch

        x = x.float()
        y = y.float()

        out = self(x)
        loss = F.l1_loss(out.flatten(), y.flatten())

        with torch.no_grad():
            mse = F.mse_loss(out.flatten(), y.flatten())

        self.log('loss', loss)
        self.log('mse_loss', mse, prog_bar=True)
        return loss
    
    def validation_step(self, val_batch, batch_idx):
        self.eval()
        with torch.no_grad():
            x, y = val_batch
            x = x.float()
            y = y.float()
            out = self(x)
            loss = F.l1_loss(out.flatten(), y.flatten())
            mse = F.mse_loss(out.flatten(), y.flatten())
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_mse_loss', mse, prog_bar=True)
    
    def training_epoch_end(self, outputs):
        sch = self.lr_schedulers()

        # If the selected scheduler is a ReduceLROnPlateau scheduler.
        if isinstance(sch, torch.optim.lr_scheduler.ReduceLROnPlateau):
            sch.step(self.trainer.callback_metrics["val_loss"])
        else:
            sch.step()
    

In [None]:
#device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
#print("Device is:", device, "(CUDA is recommended for faster training!)\n")

model = LongShortTermNetwork(N_FEATURES, 1, 256, 4, True, 0.2, [256, 128], 'none')
print(model)

In [None]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    monitor="val_loss",
    dirpath="./checkpoints",
    filename="sample-pressure-model-{epoch:02d}-{val_loss:.3f}",
    save_top_k=3,
    mode="min",
)
lr_callback = pl.callbacks.LearningRateMonitor(logging_interval='epoch')

In [None]:
trainer = pl.Trainer(gpus=1, max_epochs=100, callbacks=[checkpoint_callback, lr_callback])
trainer.fit(model, trainLoader, devLoader)

In [None]:
print('Best Score:', checkpoint_callback.best_model_score)

In [None]:
best_model_states = torch.load(checkpoint_callback.best_model_path)
best_model_states.keys()

### Testing the model with my test set

In [None]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
pressure_net = pressure_net.to(DEVICE)
with torch.no_grad():
    pressure_net.eval()
    losses = []
    mse_losses = []
    for batch in testLoader:
        x, y = batch
        x, y = x.to(DEVICE).float(), y.to(DEVICE).float()
        
        out = pressure_net(x)
        
        mae = F.l1_loss(out.flatten(), y.flatten())
        mse = F.mse_loss(out.flatten(), y.flatten())
        
        losses.append(mae.item())
        mse_losses.append(mse.item())
mean_loss = np.mean(losses)
mean_mse_loss = np.mean(mse_losses)

In [None]:
print(f"Mean Testing Loss (MAE): {mean_loss:.4f}...")
print(f"Mean Testing MSE Loss: {mean_mse_loss:.4f}...")

Last but one testing losses:

 > Mean Testing Loss (MAE): 0.2141...  
 > Mean Testing MSE Loss: 0.1239...
 


In [None]:
def predict(net: nn.Module, features, device: str = 'cpu', eval_batch: int = 200) -> np.array:
    """Return the predictions feedforwarding the features to the model."""
    with torch.no_grad():
        net.eval()
        net = net.to(device)
        predictions = []
        for i in range(0, features.size(0), eval_batch):
            input_t = features[i:min(i+eval_batch, features.size(0))].to(device)
            prediction = net(input_t.float())
            predictions.append(prediction.cpu())
    return torch.cat(predictions, dim=0).numpy()

In [None]:
K = 1122

y = y_test[K]
y_hat = predictions[K]

plt.plot(y)
plt.plot(y_hat)
plt.title(f'Constant showcase example n={K}')
plt.legend(['Targets', 'Predictions'])
plt.ylabel('Pressure')
plt.show();

print('MSE: %.4f...' % mean_squared_error(y, y_hat))
print('MAE: %.4f...' % mean_absolute_error(y, y_hat))

In [None]:
k = np.random.randint(0, predictions.shape[0])

y = y_test[k]
y_hat = predictions[k]

plt.plot(y)
plt.plot(y_hat)
plt.title(f'Random Showcase example n={k}')
plt.legend(['Targets', 'Predictions'])
plt.ylabel('Pressure')
plt.show();

print('MSE: %.4f...' % mean_squared_error(y, y_hat))
print('MAE: %.4f...' % mean_absolute_error(y, y_hat))

# Now the Testing data

In [None]:
testing_set_dataframe = featurize(test)
testing_set_dataframe.head()

In [None]:
testing_set = testing_set_dataframe.to_numpy().reshape(-1, N_TIME_STEPS_PER_EXAMPLE, N_FEATURES)
test_predictions = predict(model, torch.from_numpy(testing_set), device='cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
submission = pd.DataFrame({
    'id': np.arange(1, test_predictions.size+1),
    'pressure': test_predictions.flatten()
})
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)