# Ventilator 1dCNN LSTM

In most sharing code, the feature values to input LSTM are made by humans.
I use 1d-CNN to extract features.

By only using the basic 4 features (R, C, u_in, u_out), I got OOF score = 0.387 and LB score = 0.559.

## Update
### 2021.10.27 (VersionÂ 6)
multiple kernel(1, 2, 3, 4) CNN and classification training

I got OOF score = 0.4126, LB score = 0.5946.
It's a huge overfit.

<img src="https://raw.githubusercontent.com/trtd56/RFCX/main/tmp/%E3%82%B9%E3%82%AF%E3%83%AA%E3%83%BC%E3%83%B3%E3%82%B7%E3%83%A7%E3%83%83%E3%83%88%202021-10-27%2015.39.51.png" alt="drawing" width="400"/>


In [None]:
from kaggle_secrets import UserSecretsClient
secret_label = "wandb"
secret_value = UserSecretsClient().get_secret(secret_label)
!wandb login $secret_value

In [None]:
import gc
import os
import random
import wandb

import numpy as np
import pandas as pd

from sklearn.model_selection import GroupKFold
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

from transformers import AdamW
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup


device = torch.device("cuda")

In [None]:
class config:
    EXP_NAME = "exp138_classifiy"
    
    INPUT = "/kaggle/input/ventilator-pressure-prediction"
    OUTPUT = "/kaggle/working"
    N_FOLD = 5
    SEED = 0
    
    LR = 5e-3
    N_EPOCHS = 50
    EMBED_SIZE = 64
    HIDDEN_SIZE = 256
    BS = 512
    WEIGHT_DECAY = 1e-5
    T_MAX = 50
    MIN_LR = 1e-6
    
    NOT_WATCH_PARAM = ['INPUT']

In [None]:
def set_seed(seed=config.SEED):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
class VentilatorDataset(Dataset):
    
    def __init__(self, df, label_dic=None):
        self.dfs = [_df for _, _df in df.groupby("breath_id")]
        self.label_dic = label_dic
        
    def __len__(self):
        return len(self.dfs)
    
    def __getitem__(self, item):
        df = self.dfs[item]
        
        X = df[['R_cate', 'C_cate', 'u_in', 'u_out']].values
        y = df['pressure'].values
        
        if self.label_dic is None:
            label = [-1]
        else:
            label = [self.label_dic[i] for i in y]

        d = {
            "X": torch.tensor(X).float(),
            "y" : torch.tensor(label).long(),
        }
        return d

In [None]:
class VentilatorModel(nn.Module):
    
    def __init__(self):
        super(VentilatorModel, self).__init__()
        self.seq_emb = nn.Sequential(
            nn.Linear(4, config.EMBED_SIZE),
            nn.LayerNorm(config.EMBED_SIZE),
            nn.ReLU(),
            nn.Dropout(0.2),
        )
        
        self.cnn_1_1 = nn.Conv1d(config.EMBED_SIZE, config.HIDDEN_SIZE, kernel_size=1, padding=0)
        self.cnn_1_2 = nn.Conv1d(config.HIDDEN_SIZE, config.HIDDEN_SIZE, kernel_size=1, padding=0)
        self.cnn_2_1 = nn.Conv1d(config.EMBED_SIZE, config.HIDDEN_SIZE, kernel_size=2, padding=1)
        self.cnn_2_2 = nn.Conv1d(config.HIDDEN_SIZE, config.HIDDEN_SIZE, kernel_size=2, padding=0)
        self.cnn_3_1 = nn.Conv1d(config.EMBED_SIZE, config.HIDDEN_SIZE, kernel_size=3, padding=2)
        self.cnn_3_2 = nn.Conv1d(config.HIDDEN_SIZE, config.HIDDEN_SIZE, kernel_size=3, padding=0)
        self.cnn_4_1 = nn.Conv1d(config.EMBED_SIZE, config.HIDDEN_SIZE, kernel_size=4, padding=3)
        self.cnn_4_2 = nn.Conv1d(config.HIDDEN_SIZE, config.HIDDEN_SIZE, kernel_size=4, padding=0)
        
        self.lstm1 = nn.LSTM(config.HIDDEN_SIZE * 4, config.HIDDEN_SIZE, batch_first=True, bidirectional=True)
        self.lstm2 = nn.LSTM(config.HIDDEN_SIZE * 2, config.HIDDEN_SIZE, batch_first=True, bidirectional=True)
        self.lstm3 = nn.LSTM(config.HIDDEN_SIZE * 2, config.HIDDEN_SIZE, batch_first=True, bidirectional=True)
        self.lstm4 = nn.LSTM(config.HIDDEN_SIZE * 2, config.HIDDEN_SIZE, batch_first=True, bidirectional=True)
        self.head = nn.Sequential(
            nn.Linear(config.HIDDEN_SIZE * 2, config.HIDDEN_SIZE * 2),
            nn.LayerNorm(config.HIDDEN_SIZE * 2),
            nn.ReLU(),
            nn.Dropout(0.),
            nn.Linear(config.HIDDEN_SIZE * 2, 950),
        )
        
        for n, m in self.named_modules():
            if isinstance(m, nn.LSTM):
                print(f'init {m}')
                for param in m.parameters():
                    if len(param.shape) >= 2:
                        nn.init.orthogonal_(param.data)
                    else:
                        nn.init.normal_(param.data)

    def forward(self, X, y=None):
        h = self.seq_emb(X)
        # CNN
        h = h.permute(0, 2, 1)
        
        h1 = F.relu(self.cnn_1_1(h))
        h1 = F.relu(self.cnn_1_2(h1))
        h2 = F.relu(self.cnn_2_1(h))
        h2 = F.relu(self.cnn_2_2(h2))
        h3 = F.relu(self.cnn_3_1(h))
        h3 = F.relu(self.cnn_3_2(h3))
        h4 = F.relu(self.cnn_4_1(h))
        h4 = F.relu(self.cnn_4_2(h4))
        h = torch.cat((h1, h2, h3, h4), 1)
        
        h = h.permute(0, 2, 1)
        # LSTM
        out, (hn, cn) = self.lstm1(h, None) 
        out, (hn, cn) = self.lstm2(out, (hn, cn)) 
        out, (hn, cn) = self.lstm3(out, (hn, cn)) 
        h, _ = self.lstm4(out, (hn, cn))
        # Head
        logits = self.head(h)
        
        if y is None:
            loss = None
        else:
            mask = X[:, :, 3] == 0
            loss = self.loss_fn(logits, y, mask)
            
        return logits, loss
    
    def loss_fn(self, y_pred, y_true, mask):
        criterion = nn.CrossEntropyLoss()

        loss_u_out_0 = criterion(y_pred[mask].reshape(-1, 950), y_true[mask].reshape(-1))
        loss_u_out_1 = criterion(y_pred[mask==0].reshape(-1, 950), y_true[mask==0].reshape(-1))

        for lag, w in [(1, 0.4), (2, 0.2), (3, 0.1), (4, 0.1)]:
            # negative
            loss_u_out_0 += criterion(y_pred[mask].reshape(-1, 950), F.relu(y_true[mask].reshape(-1) - lag).long()) * w
            loss_u_out_1 += criterion(y_pred[mask==0].reshape(-1, 950), F.relu(y_true[mask==0].reshape(-1) - lag).long()) * w
            # positive
            loss_u_out_0 += criterion(y_pred[mask].reshape(-1, 950), (949 - F.relu((949 - (y_true[mask].reshape(-1) + lag)))).long()) * w
            loss_u_out_1 += criterion(y_pred[mask==0].reshape(-1, 950), (949 - F.relu((949 - (y_true[mask==0].reshape(-1) + lag)))).long()) * w

        loss = loss_u_out_0 + loss_u_out_1 * 0.5
        return loss

In [None]:
def train_loop(model, optimizer, scheduler, loader):
    losses, lrs = [], []
    model.train()
    optimizer.zero_grad()
    for d in loader:
        out, loss = model(d['X'].to(device), d['y'].to(device))
        
        losses.append(loss.item())
        step_lr = np.array([param_group["lr"] for param_group in optimizer.param_groups]).mean()
        lrs.append(step_lr)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        scheduler.step()

    return np.array(losses).mean(), np.array(lrs).mean()

def valid_loop(model, loader, target_dic_inv):
    losses, predicts = [], []
    model.eval()
    for d in loader:
        with torch.no_grad():
            out, loss = model(d['X'].to(device), d['y'].to(device))
        losses.append(loss.item())
        predicts.append(out.argmax(2).cpu())

    return np.array(losses).mean(), target_dic_inv[torch.vstack(predicts).reshape(-1)].numpy()

def test_loop(model, loader, target_dic_inv):
    predicts = []
    model.eval()
    for d in loader:
        with torch.no_grad():
            out, _ = model(d['X'].to(device))
        predicts.append(out.argmax(2).cpu())

    return target_dic_inv[torch.vstack(predicts).reshape(-1)].numpy()

In [None]:
def main():
    
    train_df = pd.read_csv(f"{config.INPUT}/train.csv")
    test_df = pd.read_csv(f"{config.INPUT}/test.csv")
    sub_df = pd.read_csv(f"{config.INPUT}/sample_submission.csv")
    oof = np.zeros(len(train_df))
    test_preds_lst = []
    
    target_dic = {v:i for i, v in enumerate(sorted(train_df['pressure'].unique().tolist()))}
    target_dic_inv = torch.tensor(list(target_dic.keys()))

    gkf = GroupKFold(n_splits=config.N_FOLD).split(train_df, train_df.pressure, groups=train_df.breath_id)
    for fold, (_, valid_idx) in enumerate(gkf):
        train_df.loc[valid_idx, 'fold'] = fold

    train_df['C_cate'] = train_df['C'].map({10: 0, 20: 1, 50:2})
    train_df['R_cate'] = train_df['R'].map({5: 0, 20: 1, 50:2})
    test_df['C_cate'] = test_df['C'].map({10: 0, 20: 1, 50:2})
    test_df['R_cate'] = test_df['R'].map({5: 0, 20: 1, 50:2})

    test_df['pressure'] = -1
    test_dset = VentilatorDataset(test_df)
    test_loader = DataLoader(test_dset, batch_size=config.BS,
                             pin_memory=True, shuffle=False, drop_last=False, num_workers=os.cpu_count())
    
    for fold in range(config.N_FOLD):
        print(f'Fold-{fold}')
        train_dset = VentilatorDataset(train_df.query(f"fold!={fold}"), target_dic)
        valid_dset = VentilatorDataset(train_df.query(f"fold=={fold}"), target_dic)

        set_seed()
        train_loader = DataLoader(train_dset, batch_size=config.BS,
                                  pin_memory=True, shuffle=True, drop_last=True, num_workers=os.cpu_count(),
                                  worker_init_fn=lambda x: set_seed())
        valid_loader = DataLoader(valid_dset, batch_size=config.BS,
                                  pin_memory=True, shuffle=False, drop_last=False, num_workers=os.cpu_count())

        model = VentilatorModel()
        model.to(device)

        optimizer = AdamW(model.parameters(), lr=config.LR, weight_decay=config.WEIGHT_DECAY)
        num_train_steps = int(len(train_loader) * config.N_EPOCHS)
        num_warmup_steps = int(num_train_steps / 10)
        scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_train_steps)

        uniqe_exp_name = f"{config.EXP_NAME}_f{fold}"
        wandb.init(project='Ventilator', entity='trtd56', name=uniqe_exp_name, group=config.EXP_NAME)
        wandb_config = wandb.config
        wandb_config.fold = fold
        for k, v in dict(vars(config)).items():
            if k[:2] == "__" or k in config.NOT_WATCH_PARAM:
                continue
            wandb_config[k] = v
        wandb.watch(model)
        
        os.makedirs(f'{config.OUTPUT}/{config.EXP_NAME}', exist_ok=True)
        model_path = f"{config.OUTPUT}/{config.EXP_NAME}/ventilator_f{fold}_best_model.bin"
        
        valid_best_score = float('inf')
        for epoch in tqdm(range(config.N_EPOCHS)):
            train_loss, lrs = train_loop(model, optimizer, scheduler, train_loader)
            valid_loss, valid_predict = valid_loop(model, valid_loader, target_dic_inv)
            valid_score = np.abs(valid_predict - train_df.query(f"fold=={fold}")['pressure'].values).mean()

            if valid_score < valid_best_score:
                valid_best_score = valid_score
                torch.save(model.state_dict(), model_path)
                oof[train_df.query(f"fold=={fold}").index.values] = valid_predict

            wandb.log({
                "train_loss": train_loss,
                "valid_loss": valid_loss,
                "valid_score": valid_score,
                "valid_best_score": valid_best_score,
                "learning_rate": lrs,
            })
            
            torch.cuda.empty_cache()
            gc.collect()
        
        model.load_state_dict(torch.load(model_path))
        test_preds = test_loop(model, test_loader, target_dic_inv)
        test_preds_lst.append(test_preds)
        
        sub_df['pressure'] = test_preds
        sub_df.to_csv(f"{config.OUTPUT}/{config.EXP_NAME}/sub_f{fold}.csv", index=None)
        
    train_df['oof'] = oof
    train_df.to_csv(f"{config.OUTPUT}/{config.EXP_NAME}/oof.csv", index=None)
    
    sub_df['pressure'] = np.stack(test_preds_lst).mean(0)
    sub_df.to_csv(f"{config.OUTPUT}/{config.EXP_NAME}/submission.csv", index=None)
    
    cv_score = train_df.apply(lambda x: abs(x['oof'] - x['pressure']), axis=1).mean()
    print("CV:", cv_score)

In [None]:
if __name__ == "__main__":
    main()

In [None]:
wandb.finish()