In [None]:
import sys, os
import numpy as np
import pandas as pd
import pickle
import torch.nn.functional as F
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm

workplace = '/content/drive/MyDrive/kaggle/codes/ventilator-pressure-prediction/'
sys.path.append(workplace)
from models.torch_lstm import simpleLSTM, TsLSTM, embedLSTM, dualDeepLSTM
from utils import load_json, save_json
from datasets import simpleData 
from functions import eval_metrics, torch_loss_metrics

In [None]:
_supported_model = {
    'simple-lstm':  simpleLSTM,
    'embedded': embedLSTM,
    'transformer': TsLSTM,
    'dualdeep': dualDeepLSTM,
}

_supported_criterion = {
    "mse": F.mse_loss,
    "mae": F.l1_loss,
}

In [None]:
def load_model(cfg, ckpt_dir):
    model = _supported_model[cfg['model_name']](**cfg['model_param'])
    state_dict = torch.load(ckpt_dir)
    model.load_state_dict(state_dict)
    model.to(0)

    return model

In [None]:
class TestData(Dataset):
    def __init__(self, df, x_col):
        self.feature = df[x_col].values.astype(np.float32).reshape(-1, 80, len(x_col))
        self.u_out = df['u_out'].values.astype(np.float32).reshape(-1,80)
        self.id = df['id'].values.reshape(-1, 80)

    def __len__(self):
        return len(self.feature)
    
    def __getitem__(self, idx):
        feature = self.feature[idx]
        u_out = self.u_out[idx]
        id = self.id[idx]

        return feature, u_out, id

In [None]:
def valid_predictor(model, vidx, df, xcols):
    loader = DataLoader(simpleData(df, vidx, xcols, ['pressure']), 1024)

    pred, gt = [], []

    model.to(0)
    model.eval()
    with torch.no_grad():
        for batch in tqdm(loader): 
            x, u_out, t = batch
            x, u_out = x.to(0), u_out.to(0)

            y_in, y_out = model(x)
            y = y_in*(1-u_out) + y_out*u_out

            y = y.cpu().detach().numpy()
            t = t.detach().numpy()

            pred.append(y)
            gt.append(t)
    
    torch.cuda.empty_cache()
    model.cpu()

    pred = np.concatenate(pred)
    gt = np.concatenate(gt)

    print(eval_metrics.mae(gt, pred))

def test_predictor(model, loader):
    pred, ids = [], []

    model.to(0)
    model.eval()
    with torch.no_grad():
        for batch in tqdm(loader):
            x, u_out, id = batch 
            x, u_out = x.to(0), u_out.to(0)

            y_in, y_out = model(x)
            y = y_in*(1-u_out) + y_out*u_out

            y = y.cpu().detach().numpy()
            id = id.detach().numpy()

            pred.append(y)
            ids.append(id)
    
    torch.cuda.empty_cache()
    model.cpu()

    pred = np.concatenate(pred)
    ids = np.concatenate(ids)

    return pred, ids

In [None]:
logdir = workplace + 'logs/lstm/1105125644/'
data_dir = '/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/'

cfg = load_json(logdir + 'params.json')
xcols = pickle.load(open(logdir + 'x_cols.pkl', 'rb'))
kf = 5

In [None]:
train_df = pd.read_csv(cfg['csv_dir'])
test_df = pd.read_csv(data_dir + 'test_v7-scaled.csv')
test_df['pressure'] = -1

In [None]:
models = [load_model(cfg, f'{logdir}{i}/best_model') for i in range(kf)]
test_loader = DataLoader(TestData(test_df, xcols), 1024)
vidxs = [pickle.load(open(f'{logdir}{i}/valid_idx.pkl', 'rb')) for i in range(kf)]

In [None]:
preds,  idx= [], []
for i in range(kf):
    p, i = test_predictor(models[0], test_loader)
    preds.append(np.concatenate(p))
    idx.append(np.concatenate(i))

In [None]:
test_pred_mean = np.mean(preds, axis=0)
test_pred_median = np.median(preds, axis=0)

In [None]:
sample_sub = pd.read_csv('/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/sample_submission.csv')

In [None]:
sub_mean = sample_sub.copy()
sub_median = sample_sub.copy()

sub_mean['pressure'] = test_pred_mean
sub_median['pressure'] = test_pred_median

In [None]:
out = '/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/result/'
os.makedirs(out, exist_ok=True)

basename = os.path.basename(logdir[:-1])
sub_mean.to_csv(f'{out}{basename}_mean.csv', index=False)
sub_median.to_csv(f'{out}{basename}_median.csv', index=False)