In [2]:
import sys, os
import numpy as np 
import pandas as pd
import warnings
import pickle
warnings.filterwarnings("ignore")
import tensorflow as tf
from tensorflow import keras

workplace = '/content/drive/MyDrive/kaggle/codes/ventilator-pressure-prediction/'
sys.path.append(workplace)
from utils import save_json, load_json

In [3]:
def load_data(csv_dir, x_col):
    df = pd.read_csv(csv_dir) 

    x = df[x_col].values.astype(np.float32).reshape(-1, 80, len(x_col))
    y = df['pressure'].values.astype(np.float32).reshape(-1, 80)
    u_out = df['u_out'].values.astype(np.float32).reshape(-1, 80)

    return x, u_out, y

def load_data_test(csv_dir, x_col):
    df = pd.read_csv(csv_dir) 

    x = df[x_col].values.astype(np.float32).reshape(-1, 80, len(x_col))

    return x

In [4]:
def mask_mae(y_true, y_pred, u_out_col = 2):
    y = y_true[:, :, -1]

    error = tf.abs(y - y_pred[:, :, 0])
    u_out = 1 - y_true[:, :, u_out_col]
    w = 1 - u_out
    error = w * error
    return tf.reduce_sum(error, axis=-1) / tf.reduce_sum(w, axis=-1)

In [5]:
log_dir = '/content/drive/MyDrive/kaggle/codes/ventilator-pressure-prediction/logs/dlast/good_model/'
conf = load_json(log_dir + 'param.json')

In [6]:
x_col = pickle.load(open(log_dir + 'x_col.pkl', 'rb'))
x_col.remove('pressure')
train_data = load_data(conf['csv_dir'], x_col)
test_csv = '/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/test_v7-scaled.csv'
test_data = load_data_test(test_csv, x_col)

In [None]:
valid_pred, valid_gt = [], []
subs = []
eval_ids = []
gpu_strategy = tf.distribute.get_strategy()
with gpu_strategy.scope():
    for i in range(10):
        model = keras.models.load_model(f'{log_dir}model-{i}')
        eval_id = pickle.load(open(f'{log_dir}valid_idx_{i}.pkl', 'rb'))
        eval_ids.append(eval_id)
        x,  y = train_data[0][eval_id], train_data[2][eval_id]
        valid_pred.append(model.predict(x, batch_size=1024, verbose=2).squeeze().reshape(-1, 1).squeeze())
        valid_gt.append(y.reshape(-1, 1)[:, 0])
        subs.append(model.predict(test_data, batch_size=1024, verbose=2).squeeze().reshape(-1, 1).squeeze())

In [None]:
for i in range(10):
    print('fold', i)
    mask_i = train_data[1][eval_ids[i]].reshape(-1, 1)[:,0] < 0

    diff = valid_gt[i-s] - valid_pred[i]
    diff = np.abs(diff)
    diff *= mask_i
    print('score :', np.sum(diff) / np.sum(mask_i))

In [10]:
ssub = pd.read_csv('/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/sample_submission.csv')
out = '/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/result/good_folds/'
os.makedirs(out, exist_ok=True)
for i in range(len(subs)):
    ssub['pressure'] = subs[i]
    ssub.to_csv(f'{out}fold_{i}.csv', index=False)

In [23]:
ss = [pd.read_csv(f'/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/result/good_folds/fold_{i}.csv') for i in range(10)]

In [24]:
pres = [si['pressure'].values for si in ss]
df = pd.read_csv('/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/train.csv')
ssub = pd.read_csv('/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/sample_submission.csv')

In [13]:
ssub["pressure"] = np.median(np.vstack([pres]),axis=0)
ssub.to_csv('/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/result/10folds_median.csv', index=False)
ssub["pressure"] = np.mean(np.vstack([pres]),axis=0)
ssub.to_csv('/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/result/10folds_mean.csv', index=False)

In [25]:
all_pressure = sorted(df.pressure.unique())
PRESSURE_MIN = all_pressure[0]
PRESSURE_MAX = all_pressure[-1]
PRESSURE_STEP = (all_pressure[1] - all_pressure[0])
ssub["pressure"] = np.median(np.vstack([pres]),axis=0)
ssub["pressure"] =np.round( (ssub.pressure - PRESSURE_MIN)/PRESSURE_STEP ) * PRESSURE_STEP + PRESSURE_MIN
ssub.pressure = np.clip(ssub.pressure, PRESSURE_MIN, PRESSURE_MAX)
pressure_unique = np.array(sorted(df['pressure'].unique()))
ssub['pressure'] = ssub['pressure'].map(lambda x: pressure_unique[np.abs(pressure_unique-x).argmin()])

In [26]:
ssub.to_csv('/content/drive/MyDrive/kaggle/datasets/ventilator-pressure-prediction/result/10folds_pp_submission.csv', index=False)