In [1]:
import sys
from time import time
import numpy as np
import pandas as pd
from pathlib import Path
import lightgbm as lgb
import matplotlib.pyplot as plt 
import seaborn as sns
from tqdm import tqdm
import copy
import wandb
from collections import OrderedDict

from sklearn.metrics import mean_absolute_error
from sklearn import model_selection

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as torchdata

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import WandbLogger


In [2]:
sys.path.append('../../src/')
import utils as utils
from utils import Timer

In [3]:
class CFG:
    seed = 42
    exp_num = 2
    local = True
    n_folds = 5
    folds = [0, 1, 2, 3, 4]
    debug = False
    bias = 1000
    epochs = 500

    
    ######################
    # Dataset #
    ######################
    transforms = {
        "train": [{"name": ""}],
        "valid": [{"name": ""}],
        "test": [{"name": ""}]
    }

    ######################
    # Loaders #
    ######################
    loader_params = {
        "train": {
            'batch_size': 128,
            'shuffle': True,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': True,
        },
        "valid": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        },
        "test": {
            'batch_size': 32,
            'shuffle': False,
            'num_workers': 8,
            'pin_memory': True,
            'drop_last': False,
        }
    }

    ######################
    # Split #
    ######################
    split = "GroupKFold"
    split_params = {
        "n_splits": 5,
    }

    ######################
    # Model #
    ######################
    input_dim = 5

    dense_dim = 512
    lstm_dim = 512
    logit_dim = 512
    num_classes = 1

    ######################
    # Criterion #
    ######################
#     loss_name = "rmspe_loss"
#     loss_params: dict = {}

    ######################
    # Optimizer #
    ######################
    optimizer_name = "Adam"
    optimizer_params = {
        "lr": 0.001
    }

    ######################
    # Scheduler #
    ######################
    scheduler_name = "ReduceLROnPlateau"
    scheduler_params = {
        'factor': 0.2, 
        'patience': 7
    }

In [4]:
utils.set_seed(CFG.seed)

In [5]:
if CFG.local:
    DATA_DIR = Path("/home/knikaido/work/Ventilator-Pressure-Prediction/data/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('./output/')
else:
    DATA_DIR = Path("../input/ventilator-pressure-prediction")
    OUTPUT_DIR = Path('')   

In [6]:
def get_transforms(phase: str):
    transforms = CFG.transforms
    if transforms is None:
        return None
    else:
        if transforms[phase] is None:
            return None
        trns_list = []
        for trns_conf in transforms[phase]:
            trns_name = trns_conf["name"]
            trns_params = {} if trns_conf.get("params") is None else \
                trns_conf["params"]
            if globals().get(trns_name) is not None:
                trns_cls = globals()[trns_name]
                trns_list.append(trns_cls(**trns_params))

        if len(trns_list) > 0:
            return Compose(trns_list)
        else:
            return None
        
        
class Normalize:
    def __call__(self, y: np.ndarray):
        max_vol = np.abs(y).max()
        y_vol = y * 1 / max_vol
        return np.asfortranarray(y_vol)


class Compose:
    def __init__(self, transforms: list):
        self.transforms = transforms

    def __call__(self, y: np.ndarray):
        for trns in self.transforms:
            y = trns(y)
        return y

In [7]:
def compute_metric(preds, trues, u_outs):
    """
    Metric for the problem, as I understood it.
    """
    
    y = trues
    w = 1 - u_outs
    
    assert y.shape == preds.shape and w.shape == y.shape, (y.shape, preds.shape, w.shape)
    
    mae = w * np.abs(y - preds)
    mae = mae.sum() / w.sum()
    
    return mae


class VentilatorLoss(nn.Module):
    """
    Directly optimizes the competition metric
    """
    def __call__(self, preds, y, u_out):
        w = 1 - u_out
        mae = w * (y - preds).abs()
        mae = mae.sum(-1) / w.sum(-1)

        return mae

In [8]:
def get_criterion():
    return VentilatorLoss()

In [9]:
# Custom optimizer
__OPTIMIZERS__ = {}


def get_optimizer(model: nn.Module):
    optimizer_name = CFG.optimizer_name
    if optimizer_name == "SAM":
        base_optimizer_name = CFG.base_optimizer
        if __OPTIMIZERS__.get(base_optimizer_name) is not None:
            base_optimizer = __OPTIMIZERS__[base_optimizer_name]
        else:
            base_optimizer = optim.__getattribute__(base_optimizer_name)
        return SAM(model.parameters(), base_optimizer, **CFG.optimizer_params)

    if __OPTIMIZERS__.get(optimizer_name) is not None:
        return __OPTIMIZERS__[optimizer_name](model.parameters(),
                                              **CFG.optimizer_params)
    else:
        return optim.__getattribute__(optimizer_name)(model.parameters(),
                                                      **CFG.optimizer_params)


def get_scheduler(optimizer):
    scheduler_name = CFG.scheduler_name

    if scheduler_name is None:
        return
    else:
        return optim.lr_scheduler.__getattribute__(scheduler_name)(
            optimizer, **CFG.scheduler_params)

In [10]:
# validation
splitter = getattr(model_selection, CFG.split)(**CFG.split_params)

In [11]:
class VentilatorDataset(torchdata.Dataset):
    def __init__(self, df):
        if "pressure" not in df.columns:
            df['pressure'] = 0

        self.df = df.groupby('breath_id').agg(list).reset_index()
        
        self.prepare_data()
                
    def __len__(self):
        return self.df.shape[0]
    
    def prepare_data(self):
        self.pressures = np.array(self.df['pressure'].values.tolist())
        
        rs = np.array(self.df['R'].values.tolist())
        cs = np.array(self.df['C'].values.tolist())
        u_ins = np.array(self.df['u_in'].values.tolist())
        
        self.u_outs = np.array(self.df['u_out'].values.tolist())
        
        self.inputs = np.concatenate([
            rs[:, None], 
            cs[:, None], 
            u_ins[:, None], 
            np.cumsum(u_ins, 1)[:, None],
            self.u_outs[:, None]
        ], 1).transpose(0, 2, 1)

    def __getitem__(self, idx):
        data = {
            "input": self.inputs[idx].astype(np.float32),
            "u_out": self.u_outs[idx].astype(np.float32),
            "p": self.pressures[idx].astype(np.float32),
        }
        
        return data

In [12]:
class RNNModel(nn.Module):
    def __init__(
        self,
        input_dim=4,
        lstm_dim=256,
        dense_dim=256,
        logit_dim=256,
        num_classes=1,
    ):
        super().__init__()

        self.mlp = nn.Sequential(
            nn.Linear(input_dim, dense_dim // 2),
            nn.ReLU(),
            nn.Linear(dense_dim // 2, dense_dim),
            nn.ReLU(),
        )

        self.lstm = nn.LSTM(dense_dim, lstm_dim, batch_first=True, bidirectional=True)

        self.logits = nn.Sequential(
            nn.Linear(lstm_dim * 2, logit_dim),
            nn.ReLU(),
            nn.Linear(logit_dim, num_classes),
        )

    def forward(self, x):
        features = self.mlp(x)
        features, _ = self.lstm(features)
        pred = self.logits(features)
        return pred

In [13]:
# Learner class(pytorch-lighting)
class Learner(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.criterion = get_criterion()
    
    def training_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        return loss
    
    def validation_step(self, batch, batch_idx):
        d_ = batch
        output = self.model(d_['input'])
        loss = self.criterion(output.view(-1), d_['p'].view(-1), d_['u_out'].view(-1))
        
        self.log(f'Loss/val', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)
        
        output = OrderedDict({
            "targets": d_['p'].detach(), "preds": output.detach(), "u_outs": d_['u_out'].detach(), "loss": loss.detach()
        })
        return output
    
    def validation_epoch_end(self, outputs):

        targets = torch.cat([o["targets"].view(-1) for o in outputs]).cpu().numpy()
        preds = torch.cat([o["preds"].view(-1) for o in outputs]).cpu().numpy()
        u_outs = torch.cat([o["u_outs"].view(-1) for o in outputs]).cpu().numpy()

        score = get_score(preds, targets, u_outs)
        self.log(f'custom_mae/val', score, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        print(f'epoch = {self.current_epoch}, custom_mae = {score}')

    def configure_optimizers(self):
        optimizer = get_optimizer(self.model)
        scheduler = get_scheduler(optimizer)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "Loss/val"}

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
def get_score(y_pred, y_true, u_outs):
    return compute_metric(y_pred, y_true, u_outs)


def to_np(input):
    return input.detach().cpu().numpy()

# oof
def evaluate(model, loaders, phase):
    model.eval()
    pred_list = []
    target_list = []
    with torch.no_grad():
        for batch in loaders[phase]:
            d_ = batch
            d_['input'] = d_['input'].to(device)
            output = model(d_['input'])
#             output = nn.Softmax(dim=1)(output)
            pred_list.append(to_np(output))
            target_list.append(to_np(d_['p']))

    pred_list = np.concatenate(pred_list).reshape(-1)
    target_list = np.concatenate(target_list).reshape(-1)
    model.train()
    return pred_list, target_list

In [16]:
train = pd.read_csv(DATA_DIR / 'train.csv')
test = pd.read_csv(DATA_DIR / 'test.csv')
display(train), display(test)

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out,pressure
0,1,1,20,50,0.000000,0.083334,0,5.837492
1,2,1,20,50,0.033652,18.383041,0,5.907794
2,3,1,20,50,0.067514,22.509278,0,7.876254
3,4,1,20,50,0.101542,22.808822,0,11.742872
4,5,1,20,50,0.135756,25.355850,0,12.234987
...,...,...,...,...,...,...,...,...
6035995,6035996,125749,50,10,2.504603,1.489714,1,3.869032
6035996,6035997,125749,50,10,2.537961,1.488497,1,3.869032
6035997,6035998,125749,50,10,2.571408,1.558978,1,3.798729
6035998,6035999,125749,50,10,2.604744,1.272663,1,4.079938


Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.000000,0.000000,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.230610,0
4,5,0,5,20,0.127644,26.320956,0
...,...,...,...,...,...,...,...
4023995,4023996,125748,20,10,2.530117,4.971245,1
4023996,4023997,125748,20,10,2.563853,4.975709,1
4023997,4023998,125748,20,10,2.597475,4.979468,1
4023998,4023999,125748,20,10,2.631134,4.982648,1


(None, None)

In [None]:
oof_total = np.zeros((len(train), CFG.num_classes))
sub_preds = np.zeros((test.shape[0], CFG.n_folds))
val_idxes = []
models = []
y = train['pressure']
groups = train['breath_id']
gkfold = model_selection.GroupKFold(n_splits=CFG.n_folds)
scores = []

for i, (trn_idx, val_idx) in enumerate(splitter.split(train, y, groups)):
    if i not in CFG.folds:
        continue

    trn_df = train.loc[trn_idx, :].reset_index(drop=True)
    val_df = train.loc[val_idx, :].reset_index(drop=True)
    trn_y = y.values[trn_idx]
    val_y = y.values[val_idx]
    
    
    loaders = {
        phase: torchdata.DataLoader(
            VentilatorDataset(
                df_,
            ),
            **CFG.loader_params[phase])  # type: ignore
        for phase, df_ in zip(["train", "valid", "test"], [trn_df, val_df, test])
    }
    
    
    model = RNNModel(
        input_dim=CFG.input_dim,
        lstm_dim=CFG.lstm_dim,
        dense_dim=CFG.dense_dim,
        logit_dim=CFG.logit_dim,
        num_classes=CFG.num_classes,
    )
    model_name = model.__class__.__name__
#     break
    
    learner = Learner(model)
    
    # loggers
    RUN_NAME = f'exp{str(CFG.exp_num)}'
    wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type=RUN_NAME + f'-fold-{i}')
    wandb.run.name = RUN_NAME + f'-fold-{i}'
    wandb_config = wandb.config
    wandb_config.model_name = model_name
    wandb.watch(model)
    
    # callbacks
    callbacks = []
    checkpoint_callback = ModelCheckpoint(
        monitor=f'Loss/val',
        mode='min',
        dirpath=OUTPUT_DIR,
        verbose=False,
        save_weights_only=True,
        filename=f'{model_name}-{learner.current_epoch}-{i}')
    callbacks.append(checkpoint_callback)

    early_stop_callback = EarlyStopping(
        monitor='Loss/val',
        min_delta=0.00,
        patience=10,
        verbose=True,
        mode='min')
    callbacks.append(early_stop_callback)
    
    loggers = []
    loggers.append(WandbLogger())
    
    trainer = pl.Trainer(
        logger=loggers,
        callbacks=callbacks,
        max_epochs=CFG.epochs,
        default_root_dir=OUTPUT_DIR,
        gpus=1,
#         fast_dev_run=DEBUG,
        deterministic=True,
        benchmark=False,
        )
    
    trainer.fit(learner, train_dataloader=loaders['train'], val_dataloaders=loaders['valid'])
#     trainer.save_checkpoint(OUTPUT_DIR / "last.ckpt")
    print('train done.')
    
    #############
    # validation (to make oof)
    #############
    checkpoint = torch.load(checkpoint_callback.best_model_path)
    learner.load_state_dict(checkpoint['state_dict'])
    
    model = model.to(device)
    oof_pred, oof_target = evaluate(model, loaders, phase="valid")
    models.append(model)
    
    oof_score = get_score(oof_pred, oof_target, val_df['u_out'].values)
    scores.append(oof_score)
    oof_total[val_idx] = oof_pred.reshape(1, -1).T / CFG.bias
    val_idxes.append(val_idx)
    
    print('validate done.')
    print(f'fold = {i}, auc = {oof_score}')
    wandb.log({'CV_score': oof_score})
    
    #############
    # inference
    #############
    test_pred, _ = evaluate(model, loaders, phase="test")
    sub_preds[:, i] = test_pred
    
    print('inference done.')

# test_preds_total = np.array(test_preds_total)
score = mean_absolute_error(y, oof_total)
print(f'MAE {score}: folds: {scores}')

[34m[1mwandb[0m: Currently logged in as: [33msqrt4kaido[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type           | Params
---------------------------------------------
0 | model     | RNNModel       | 4.9 M 
1 | criterion | VentilatorLoss | 0     
---------------------------------------------
4.9 M     Trainable params
0         Non-trainable params
4.9 M     Total params
19.444    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

epoch = 0, custom_mae = 17.4731388092041


Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Metric Loss/val improved. New best score: 1.883


epoch = 0, custom_mae = 1.883453607559204


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.499 >= min_delta = 0.0. New best score: 1.385


epoch = 1, custom_mae = 1.3846781253814697


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.145 >= min_delta = 0.0. New best score: 1.240


epoch = 2, custom_mae = 1.2396798133850098


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.098 >= min_delta = 0.0. New best score: 1.141


epoch = 3, custom_mae = 1.1414438486099243


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.043 >= min_delta = 0.0. New best score: 1.098


epoch = 4, custom_mae = 1.0980448722839355


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.017 >= min_delta = 0.0. New best score: 1.081


epoch = 5, custom_mae = 1.0809578895568848


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.019 >= min_delta = 0.0. New best score: 1.062


epoch = 6, custom_mae = 1.06229829788208


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.005 >= min_delta = 0.0. New best score: 1.058


epoch = 7, custom_mae = 1.0576579570770264


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.110 >= min_delta = 0.0. New best score: 0.947


epoch = 8, custom_mae = 0.9473881125450134


Validating: 0it [00:00, ?it/s]

epoch = 9, custom_mae = 1.0043562650680542


Validating: 0it [00:00, ?it/s]

epoch = 10, custom_mae = 1.1142650842666626


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.030 >= min_delta = 0.0. New best score: 0.917


epoch = 11, custom_mae = 0.9174665808677673


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.049 >= min_delta = 0.0. New best score: 0.868


epoch = 12, custom_mae = 0.86821049451828


Validating: 0it [00:00, ?it/s]

epoch = 13, custom_mae = 0.9174319505691528


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.008 >= min_delta = 0.0. New best score: 0.861


epoch = 14, custom_mae = 0.8605444431304932


Validating: 0it [00:00, ?it/s]

epoch = 15, custom_mae = 0.86919105052948


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.042 >= min_delta = 0.0. New best score: 0.818


epoch = 16, custom_mae = 0.818535327911377


Validating: 0it [00:00, ?it/s]

epoch = 17, custom_mae = 0.9405674338340759


Validating: 0it [00:00, ?it/s]

epoch = 18, custom_mae = 0.8337823152542114


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.018 >= min_delta = 0.0. New best score: 0.800


epoch = 19, custom_mae = 0.8001059889793396


Validating: 0it [00:00, ?it/s]

epoch = 20, custom_mae = 0.8620361685752869


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.020 >= min_delta = 0.0. New best score: 0.780


epoch = 21, custom_mae = 0.7799476385116577


Validating: 0it [00:00, ?it/s]

epoch = 22, custom_mae = 0.8782721757888794


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.025 >= min_delta = 0.0. New best score: 0.755


epoch = 23, custom_mae = 0.755310595035553


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.001 >= min_delta = 0.0. New best score: 0.754


epoch = 24, custom_mae = 0.7539745569229126


Validating: 0it [00:00, ?it/s]

epoch = 25, custom_mae = 0.755649745464325


Validating: 0it [00:00, ?it/s]

epoch = 26, custom_mae = 0.8880383372306824


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.019 >= min_delta = 0.0. New best score: 0.735


epoch = 27, custom_mae = 0.7348693609237671


Validating: 0it [00:00, ?it/s]

epoch = 28, custom_mae = 0.742684543132782


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.003 >= min_delta = 0.0. New best score: 0.732


epoch = 29, custom_mae = 0.7315592169761658


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.033 >= min_delta = 0.0. New best score: 0.698


epoch = 30, custom_mae = 0.6982909440994263


Validating: 0it [00:00, ?it/s]

epoch = 31, custom_mae = 0.7094677090644836


Validating: 0it [00:00, ?it/s]

epoch = 32, custom_mae = 0.7199367880821228


Validating: 0it [00:00, ?it/s]

epoch = 33, custom_mae = 0.733745276927948


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.025 >= min_delta = 0.0. New best score: 0.673


epoch = 34, custom_mae = 0.6732609868049622


Validating: 0it [00:00, ?it/s]

epoch = 35, custom_mae = 0.6870973706245422


Validating: 0it [00:00, ?it/s]

epoch = 36, custom_mae = 0.7021640539169312


Validating: 0it [00:00, ?it/s]

epoch = 37, custom_mae = 0.6770766377449036


Validating: 0it [00:00, ?it/s]

epoch = 38, custom_mae = 0.7397586703300476


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.015 >= min_delta = 0.0. New best score: 0.659


epoch = 39, custom_mae = 0.6585935950279236


Validating: 0it [00:00, ?it/s]

epoch = 40, custom_mae = 0.6783562898635864


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.006 >= min_delta = 0.0. New best score: 0.652


epoch = 41, custom_mae = 0.6524278521537781


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.011 >= min_delta = 0.0. New best score: 0.642


epoch = 42, custom_mae = 0.6418192982673645


Validating: 0it [00:00, ?it/s]

epoch = 43, custom_mae = 0.672744870185852


Validating: 0it [00:00, ?it/s]

epoch = 44, custom_mae = 0.6531070470809937


Validating: 0it [00:00, ?it/s]

epoch = 45, custom_mae = 0.6725907921791077


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.003 >= min_delta = 0.0. New best score: 0.638


epoch = 46, custom_mae = 0.6384808421134949


Validating: 0it [00:00, ?it/s]

epoch = 47, custom_mae = 0.6475504636764526


Validating: 0it [00:00, ?it/s]

epoch = 48, custom_mae = 0.6431071758270264


Validating: 0it [00:00, ?it/s]

epoch = 49, custom_mae = 0.6496707797050476


Validating: 0it [00:00, ?it/s]

epoch = 50, custom_mae = 0.6490117311477661


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.010 >= min_delta = 0.0. New best score: 0.629


epoch = 51, custom_mae = 0.6288597583770752


Validating: 0it [00:00, ?it/s]

epoch = 52, custom_mae = 0.7071653008460999


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.019 >= min_delta = 0.0. New best score: 0.610


epoch = 53, custom_mae = 0.609760582447052


Validating: 0it [00:00, ?it/s]

epoch = 54, custom_mae = 0.6103218197822571


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.000 >= min_delta = 0.0. New best score: 0.610


epoch = 55, custom_mae = 0.609540581703186


Validating: 0it [00:00, ?it/s]

Metric Loss/val improved by 0.030 >= min_delta = 0.0. New best score: 0.580


epoch = 56, custom_mae = 0.5795790553092957


Validating: 0it [00:00, ?it/s]

epoch = 57, custom_mae = 0.5904820561408997


Validating: 0it [00:00, ?it/s]

In [20]:
score = mean_absolute_error(y, oof_total)
print(f'MAE {score}: folds: {scores}')

NameError: name 'scores' is not defined

In [23]:
oof_df = pd.DataFrame({'id': train['id'].values, 'pressure':oof_total.reshape(-1)})
oof_df.to_csv(OUTPUT_DIR / f'oof{CFG.exp_num}.csv',index = False)
oof_df

Unnamed: 0,id,pressure
0,1,0.005927
1,2,0.005814
2,3,0.007738
3,4,0.011584
4,5,0.012624
...,...,...
6035995,6035996,0.034579
6035996,6035997,0.034803
6035997,6035998,0.034529
6035998,6035999,0.032769


In [24]:
test = pd.read_csv(DATA_DIR / 'test.csv')
# test = test[test['u_out'] == 0]
test['pressure'] = np.mean(sub_preds, axis=1)

In [25]:
sub = pd.read_csv(DATA_DIR / 'sample_submission.csv')
sub.drop('pressure', axis=1, inplace=True)
sub = sub.join(test['pressure'])
sub = sub.fillna(0)
sub.to_csv(OUTPUT_DIR / f'sub{CFG.exp_num}.csv',index = False)
sub

Unnamed: 0,id,pressure
0,1,6.318402
1,2,6.004127
2,3,7.071553
3,4,7.573526
4,5,8.897440
...,...,...
4023995,4023996,17.940959
4023996,4023997,18.135338
4023997,4023998,18.424244
4023998,4023999,18.616339


In [None]:
wandb.init(project='Ventilator-Pressure-Prediction', entity='sqrt4kaido', group=RUN_NAME, job_type='summary')
wandb.run.name = 'summary'
wandb.log({'CV_score': oof_score})
# wandb.save(utils.get_notebook_path())
wandb.finish()