In [2]:
import os
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from tqdm.notebook import tqdm
from collections import Counter

warnings.filterwarnings("ignore")
NUM_WORKERS = 4

## Data

### Load

In [30]:
DATA_PATH = "/kaggle/input/ventilator-pressure-prediction/"

sub = pd.read_csv(DATA_PATH + 'sample_submission.csv')
df_train = pd.read_csv(DATA_PATH + 'train.csv')
df_test = pd.read_csv(DATA_PATH + 'test.csv')


df_train = df_train[df_train['breath_id'] < 500].reset_index(drop=True)

In [31]:
df_train.shape

### Dataset

In [4]:
# df.sort_values(by='time_step').groupby('breath_id').agg(list)

In [5]:
import torch
from torch.utils.data import Dataset

class VentilatorDataset(Dataset):
    def __init__(self, df):
        if "pressure" not in df.columns:
            df['pressure'] = 0

        self.df = df.groupby('breath_id').agg(list).reset_index()
        
        self.prepare_data()
                
    def __len__(self):
        return self.df.shape[0]
    
    def prepare_data(self):
        self.pressures = np.array(self.df['pressure'].values.tolist())
        
        rs = np.array(self.df['R'].values.tolist())
        cs = np.array(self.df['C'].values.tolist())
        u_ins = np.array(self.df['u_in'].values.tolist())
        
#         self.u_outs = np.array(self.df['u_out'].values.tolist())
        
        self.inputs = np.concatenate([
            rs[:, None], 
            cs[:, None], 
            u_ins[:, None], 
            np.cumsum(u_ins, 1)[:, None]
        ], 1).transpose(0, 2, 1)

    def __getitem__(self, idx):
        data = {
            "input": torch.tensor(self.inputs[idx], dtype=torch.float),
#             "u_out": torch.tensor(self.u_outs[idx], dtype=torch.float),
            "p": torch.tensor(self.pressures[idx], dtype=torch.float),
        }
        
        return data

In [6]:
dataset = VentilatorDataset(df)
dataset[0]

## Model
- 2 Layer MLP
- Bidirectional LSTM
- Prediction dense layer

In [311]:
from torch import Tensor
import torch
import torch.nn as nn
from torch.nn import Transformer
import math
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# helper Module that adds positional encoding to the token embedding to introduce a notion of word order.
class PositionalEncoding(nn.Module):
    def __init__(self,
                 emb_size: int,
                 dropout: float,
                 maxlen: int = 150):
        super(PositionalEncoding, self).__init__()
        den = torch.exp(- torch.arange(0, emb_size, 2)* math.log(10000) / emb_size)
        pos = torch.arange(0, maxlen).reshape(maxlen, 1)
        pos_embedding = torch.zeros((maxlen, emb_size))
        pos_embedding[:, 0::2] = torch.sin(pos * den)
        pos_embedding[:, 1::2] = torch.cos(pos * den)
        pos_embedding = pos_embedding.unsqueeze(-2)

        self.dropout = nn.Dropout(dropout)
        self.register_buffer('pos_embedding', pos_embedding)

    def forward(self, token_embedding: Tensor):
        return self.dropout(token_embedding + self.pos_embedding[:token_embedding.size(0), :])

# helper Module to convert tensor of input indices into corresponding tensor of token embeddings
class TokenEmbedding(nn.Module):
    def __init__(self, vocab_size: int, emb_size):
        super(TokenEmbedding, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.emb_size = emb_size

    def forward(self, tokens: Tensor):
        return self.embedding(tokens.long()) * math.sqrt(self.emb_size)

# Seq2Seq Network
class Seq2SeqTransformer(nn.Module):
    def __init__(self,
                 num_encoder_layers: int,
                 num_decoder_layers: int,
                 emb_size: int,
                 nhead: int,
                 dim_feedforward: int = 128,
                 dropout: float = 0.1):
        super(Seq2SeqTransformer, self).__init__()
        self.transformer = Transformer(d_model=emb_size, 
                                       nhead=nhead,
                                       num_encoder_layers=num_encoder_layers,
                                       num_decoder_layers=num_decoder_layers,
                                       dim_feedforward=dim_feedforward,
                                       dropout=dropout)
        self.generator = nn.Linear(emb_size, 1)
        self.mlp_inp = nn.Sequential(
            nn.Linear(4, emb_size, ),
            nn.ReLU()
        )
        self.mlp_out = nn.Sequential(
            nn.Linear(1, emb_size),
            nn.ReLU()
        )
#         self.src_tok_emb = TokenEmbedding(src_vocab_size, emb_size)
#         self.tgt_tok_emb = TokenEmbedding(tgt_vocab_size, emb_size)
#         self.positional_encoding = PositionalEncoding(
#             emb_size, dropout=dropout)

    def forward(self,
                src: Tensor,
                tgt: Tensor,
                src_mask: Tensor,
                tgt_mask: Tensor,
                src_padding_mask: Tensor,
                tgt_padding_mask: Tensor,
                memory_key_padding_mask: Tensor):
        src_emb = self.mlp_inp(src).view(80, 64, 32)
#         print('pos inp')

        tgt_emb = self.mlp_out(tgt).view(79, 64, 32)
    
        
#         print('pos dec')
        outs = self.transformer(src_emb, tgt_emb, None, tgt_mask, None,
                                None, tgt_padding_mask, memory_key_padding_mask)
#         print('pos out')
        x = self.generator(outs)
#         print('gen')
        return x

    def encode(self, src: Tensor, src_mask: Tensor):
        return self.transformer.encoder(src, src_mask)

    def decode(self, tgt: Tensor, memory: Tensor, tgt_mask: Tensor):
        return self.transformer.decoder(tgt, memory,
                          tgt_mask)
# During training, we need a subsequent word mask that will prevent model to look into the future words when making predictions. We will also need masks to hide source and target padding tokens. Below, let’s define a function that will take care of both.

def generate_square_subsequent_mask(sz):
    mask = (torch.triu(torch.ones((sz, sz), device=DEVICE)) == 1).transpose(0, 1)
    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
    return mask


def create_mask(src, tgt):
    src_seq_len = src.shape[0]
    tgt_seq_len = tgt.shape[0]

    tgt_mask = generate_square_subsequent_mask(tgt_seq_len)
    src_mask = torch.zeros((src_seq_len, src_seq_len),device=DEVICE).type(torch.bool)

    src_padding_mask = (src == -100).transpose(0, 1)
    tgt_padding_mask = (tgt == -100).transpose(0, 1)
    
    return src_mask, tgt_mask, src_padding_mask, tgt_padding_mask

In [116]:
# import torch
# import torch.nn as nn


# class RNNModel(nn.Module):
#     def __init__(
#         self,
#         input_dim=4,
#         lstm_dim=256,
#         dense_dim=256,
#         logit_dim=256,
#         num_classes=1,
#     ):
#         super().__init__()

#         self.mlp = nn.Sequential(
#             nn.Linear(input_dim, dense_dim),
#             nn.ReLU()
#         )

#         self.rnn = nn.LSTM(dense_dim, lstm_dim, batch_first=True)
#         self.rnn_back = nn.LSTM(dense_dim, lstm_dim, batch_first=True)

#         self.logits = nn.Sequential(
# #             nn.Linear(lstm_dim * 2, logit_dim),
# #             nn.ReLU(),
# #             nn.Linear(logit_dim, num_classes),
#             nn.Dropout(0.2),
#             nn.Linear(lstm_dim * 2, num_classes),
#         )

#     def forward(self, x):
#         features = self.mlp(x)
#         features_backward = torch.flip(features, (1, ))
        
#         rnn_output, (h, *_) = self.rnn(features)
#         rnn_output_backward, (h_backward, *_) = self.rnn_back(features_backward)
        
# #         h, h_backward = h.squeeze(0), h_backward.squeeze(0)
        
#         rnn_output_bidirectional = torch.cat([rnn_output, rnn_output_backward], 2)
        
        
# #         features, _ = self.lstm(features)
#         pred = self.logits(rnn_output_bidirectional)
#         return pred

## Training

### Utils

In [117]:
import os
import torch
import random
import numpy as np


def seed_everything(seed):
    """
    Seeds basic parameters for reproductibility of results.

    Args:
        seed (int): Number of the seed.
    """
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    
def count_parameters(model, all=False):
    """
    Counts the parameters of a model.

    Args:
        model (torch model): Model to count the parameters of.
        all (bool, optional):  Whether to count not trainable parameters. Defaults to False.

    Returns:
        int: Number of parameters.
    """
    if all:
        return sum(p.numel() for p in model.parameters())
    else:
        return sum(p.numel() for p in model.parameters() if p.requires_grad)

    
def worker_init_fn(worker_id):
    """
    Handles PyTorch x Numpy seeding issues.

    Args:
    
        worker_id (int): Id of the worker.
    """
    np.random.seed(np.random.get_state()[1][0] + worker_id)
    

def save_model_weights(model, filename, verbose=1, cp_folder=""):
    """
    Saves the weights of a PyTorch model.

    Args:
        model (torch model): Model to save the weights of.
        filename (str): Name of the checkpoint.
        verbose (int, optional): Whether to display infos. Defaults to 1.
        cp_folder (str, optional): Folder to save to. Defaults to "".
    """
    if verbose:
        print(f"\n -> Saving weights to {os.path.join(cp_folder, filename)}\n")
    torch.save(model.state_dict(), os.path.join(cp_folder, filename))

### Metric & Loss
> The competition will be scored as the mean absolute error between the predicted and actual pressures during the inspiratory phase of each breath. The expiratory phase is not scored.

### Fit

In [307]:
inps = a['input'] # чтобы батч был в конце
ps = a['p'].unsqueeze(2) # чтобы батч был в конце

ps_input = ps[:, :-1]

(inps_mask, ps_input_mask, 
inps_padding_mask, ps_input_padding_mask) = create_mask(inps.reshape(80, 64, 4), 
                                                        ps_input.reshape(79, 64, 1))
# pred = model(inps, ps_input, inps_mask, ps_input_mask,
#            inps_padding_mask, ps_input_padding_mask, inps_padding_mask)

In [308]:
inps_mask.size(), inps_padding_mask.size()

In [270]:
mlp_out(ps_input).reshape(79, 64, 32).size()

In [257]:
mlp_inp = nn.Sequential(
            nn.Linear(4, 32, ),
            nn.ReLU()
        )
mlp_out = nn.Sequential(
            nn.Linear(1, 32),
            nn.ReLU()
        )

transformer = Transformer(d_model=32, 
                                       nhead=8,
                                       num_encoder_layers=2,
                                       num_decoder_layers=2,
                                       dim_feedforward=128,
                                       dropout=0.2)

In [277]:
inps_mask.size()

In [309]:
m = transformer.encoder(mlp_inp(inps).reshape(80, 64, 32), None)

In [310]:
transformer.decoder(mlp_out(ps_input).reshape(79, 64, 32), m, ps_input_mask.to(torch.device('cpu'))).size()

In [242]:
inps.size(), ps_input.size()

In [247]:
ps_input.reshape(79, 64, 1).size()

In [255]:
ps_input_padding_mask.size()

In [292]:
mlp_out(ps_input).size()

In [130]:
mlp_inp(inps.T).T[:-1, :].size()

In [64]:
inps.size(), ps.size(), ps_input.size()

In [61]:
ps[:, :-1].size()

In [50]:
inps_mask

In [314]:
import gc
import time
import torch
import numpy as np
from torch.utils.data import DataLoader
from transformers import get_linear_schedule_with_warmup


def fit(
    model,
    train_dataset,
    val_dataset,
    loss_name="L1Loss",
    optimizer="Adam",
    epochs=50,
    batch_size=32,
    val_bs=32,
    warmup_prop=0.1,
    lr=1e-3,
    num_classes=1,
    verbose=1,
    first_epoch_eval=0,
    device="cuda"
):
    avg_val_loss = 0.

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)
    criterion = torch.nn.L1Loss()
    criterion = criterion.to(device)
    # Data loaders
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        drop_last=True,
        num_workers=NUM_WORKERS,
        pin_memory=True,
        worker_init_fn=worker_init_fn
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=val_bs,
        shuffle=False,
        num_workers=NUM_WORKERS,
        pin_memory=True,
    )

    # Loss
#     loss_fct = getattr(torch.nn, loss_name)(reduction="none")
#     loss_fct = VentilatorLoss()

    # Scheduler
#     num_warmup_steps = int(warmup_prop * epochs * len(train_loader))
#     num_training_steps = int(epochs * len(train_loader))
#     scheduler = get_linear_schedule_with_warmup(
#         optimizer, num_warmup_steps, num_training_steps
#     )

    for epoch in range(epochs):
        model.train()
        model.zero_grad()
        start_time = time.time()

        avg_loss = 0
        avg_metric = []
        for i, data in enumerate(train_loader):
            
            
            optimizer.zero_grad()
            #transforemer ________________________________-
            inps = data['input'].to(device) # чтобы батч был в конце
            ps = data['p'].unsqueeze(2).to(device) # чтобы батч был в конце

            ps_input = ps[:, :-1]
            
            
            (inps_mask, ps_input_mask, 
            inps_padding_mask, ps_input_padding_mask) = create_mask(inps.reshape(80, 64, 4), 
                                                                    ps_input.reshape(79, 64, 1))
            pred = model(inps, ps_input, inps_mask, ps_input_mask,
                           inps_padding_mask, ps_input_padding_mask, inps_padding_mask)

            #transforemer ________________________________-
            
#             pred = model(data['input'].to(device)).squeeze(-1)

            loss = criterion(
                pred,
                ps
            )
            metric = mean_absolute_error(data['p'], pred.detach().to('cpu').numpy())
            avg_metric.append(metric)
            loss.backward()
            avg_loss += loss.item() / len(train_loader)
            if not (i+1) % 10:
                print(f'Metriccc: {np.mean(avg_metric)};')
            
            optimizer.step()

        model.eval()
        mae, avg_val_loss = 0, 0
        preds = []

        with torch.no_grad():
            for data in val_loader:
                pred = model(data['input'].to(device)).squeeze(-1)

                loss = criterion(
                    pred.detach(), 
                    data['p'].to(device)
                )
                avg_val_loss += loss.item() / len(val_loader)

                preds.append(pred.detach().cpu().numpy())
        
        preds = np.concatenate(preds, 0).flatten()
#         print(preds.shape, np.array(val_dataset.df['pressure'].values.tolist()).flatten().shape)
        mae = mean_absolute_error(np.array(val_dataset.df['pressure'].values.tolist()).flatten(), preds)

        elapsed_time = time.time() - start_time
        if (epoch + 1) % verbose == 0:
            elapsed_time = elapsed_time * verbose
#             lr = scheduler.get_last_lr()[0]
            print(
                f"Epoch {epoch + 1:02d}/{epochs:02d} \t t={elapsed_time:.0f}s \t"
                f"loss={avg_loss:.3f}",
                f"metric={np.mean(avg_metric):.3f}",
                end="\t",
            )

            if (epoch + 1 >= first_epoch_eval) or (epoch + 1 == epochs):
                print(f"val_loss={avg_val_loss:.3f}\tmae={mae:.3f}")
            else:
                print("")

    del (val_loader, train_loader, loss, data, pred)
    gc.collect()
    torch.cuda.empty_cache()

    return preds


### Predict

In [192]:
def predict(
    model,
    dataset,
    batch_size=64,
    device="cuda"
):
    """
    Usual torch predict function. Supports sigmoid and softmax activations.
    Args:
        model (torch model): Model to predict with.
        dataset (PathologyDataset): Dataset to predict on.
        batch_size (int, optional): Batch size. Defaults to 64.
        device (str, optional): Device for torch. Defaults to "cuda".

    Returns:
        numpy array [len(dataset) x num_classes]: Predictions.
    """
    model.eval()

    loader = DataLoader(
        dataset, batch_size=batch_size, shuffle=False, num_workers=NUM_WORKERS
    )
    
    preds = []
    with torch.no_grad():
        for data in loader:
            pred = model(data['input'].to(device)).squeeze(-1)
            preds.append(pred.detach().cpu().numpy())

    preds = np.concatenate(preds, 0)
    return preds

## Train

In [228]:
def train(config, df_train, df_val, df_test, fold):
    """
    Trains and validate a model.

    Args:
        config (Config): Parameters.
        df_train (pandas dataframe): Training metadata.
        df_val (pandas dataframe): Validation metadata.
        df_test (pandas dataframe): Test metadata.
        fold (int): Selected fold.

    Returns:
        np array: Study validation predictions.
    """

    seed_everything(config.seed)

#     model = RNNModel(
#         input_dim=config.input_dim,
#         lstm_dim=config.lstm_dim,
#         dense_dim=config.dense_dim,
#         logit_dim=config.logit_dim,
#         num_classes=config.num_classes,
#     ).to(config.device)
#     model.zero_grad()abs

    NHEAD = 8
    FFN_HID_DIM = 128
    NUM_ENCODER_LAYERS = 2
    NUM_DECODER_LAYERS = 2
    EMB_SIZE = 32

    transformer = Seq2SeqTransformer(NUM_ENCODER_LAYERS, NUM_DECODER_LAYERS, EMB_SIZE,
                                     NHEAD, FFN_HID_DIM)

    for p in transformer.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

    transformer = transformer.to(DEVICE)

    train_dataset = VentilatorDataset(df_train)
    val_dataset = VentilatorDataset(df_val)
    test_dataset = VentilatorDataset(df_test)

    n_parameters = count_parameters(transformer)

    print(f"    -> {len(train_dataset)} training breathes")
    print(f"    -> {len(val_dataset)} validation breathes")
    print(f"    -> {n_parameters} trainable parameters\n")

    pred_val = fit(
        transformer,
        train_dataset,
        val_dataset,
        loss_name=config.loss,
        optimizer=config.optimizer,
        epochs=config.epochs,
        batch_size=config.batch_size,
        val_bs=config.val_bs,
        lr=config.lr,
        warmup_prop=config.warmup_prop,
        verbose=config.verbose,
        first_epoch_eval=config.first_epoch_eval,
        device=config.device,
    )
    
    pred_test = predict(
        model, 
        test_dataset, 
        batch_size=config.val_bs, 
        device=config.device
    )

    if config.save_weights:
        save_model_weights(
            model,
            f"{config.selected_model}_{fold}.pt",
            cp_folder="",
        )

    del (model, train_dataset, val_dataset, test_dataset)
    gc.collect()
    torch.cuda.empty_cache()

    return pred_val, pred_test

### $k$-fold

In [229]:
from sklearn.model_selection import GroupKFold

def k_fold(config, df, df_test):
    """
    Performs a patient grouped k-fold cross validation.
    """

    pred_oof = np.zeros(len(df))
    preds_test = []
    
    gkf = GroupKFold(n_splits=config.k)
    splits = list(gkf.split(X=df, y=df, groups=df["breath_id"]))

    for i, (train_idx, val_idx) in enumerate(splits):
        if i in config.selected_folds:
            print(f"\n-------------   Fold {i + 1} / {config.k}  -------------\n")

            df_train = df.iloc[train_idx].copy().reset_index(drop=True)
            df_val = df.iloc[val_idx].copy().reset_index(drop=True)
#             return train(config, df_train, df_val, df_test, i)
            pred_val, pred_test = train(config, df_train, df_val, df_test, i)
            
            pred_oof[val_idx] = pred_val.flatten()
            preds_test.append(pred_test.flatten())

#     print(f'\n -> CV MAE : {compute_metric(df, pred_oof) :.3f}')

    return pred_oof, np.mean(preds_test, 0)

## Main

In [230]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda" if torch.cuda.is_available() else "cpu"
    save_weights = True

    # k-fold
    k = 5
    selected_folds = [0, 1, 2, 3, 4]
    
    # Model
    selected_model = 'rnn'
    input_dim = 4

    dense_dim = 32
    lstm_dim = 256
    logit_dim = 32
    num_classes = 1

    # Training
    loss = "L1Loss"  # not used
    optimizer = "Adam"
    batch_size = 64
    epochs = 150

    lr = 1e-3
    warmup_prop = 0

    val_bs = 64
    first_epoch_eval = 0

In [231]:
# np.array(dataset.df['pressure'].values.tolist()).flatten()

In [232]:
# a = k_fold(
#     Config, 
#     df_train,
#     df_test,
# )

In [233]:
# a['p'].unsqueeze(2).T[:, :-1].size()

In [315]:
pred_oof, pred_test = k_fold(
    Config, 
    df_train,
    df_test,
)

In [39]:
torch.Tensor([[1,2,3], [1,2,4]]).unsqueeze(2)

### Predictions

In [None]:
def plot_prediction(sample_id, df):
    df_breath = df[df['breath_id'] == sample_id]

    cols = ['u_in', 'u_out', 'pressure'] if 'pressure' in df.columns else ['u_in', 'u_out']
    
    plt.figure(figsize=(12, 4))
    for col in ['pred', 'pressure', 'u_out']:
        plt.plot(df_breath['time_step'], df_breath[col], label=col)
        
    metric = compute_metric(df_breath, df_breath['pred'])
        
    plt.legend()
    plt.title(f'Sample {sample_id} - MAE={metric:.3f}')

In [None]:
df_train["pred"] = pred_oof

In [None]:
for i in df_train['breath_id'].unique()[:5]:
    plot_prediction(i, df_train)

## Sub

In [None]:
df_test['pred'] = pred_test

for i in df_test['breath_id'].unique()[:5]:
    plot_prediction(i, df_test)

In [None]:
sub['pressure'] = pred_test
sub.to_csv('submission.csv', index=False)

**Thanks for reading !**