In [1]:
import gc
import numpy as np
import pandas as pd

In [2]:
def mae(ytrue, ypred, uout=None):
    if isinstance(uout, pd.Series):
        print(f'MAE (Inspiration Phase):')
        return np.mean(np.abs((ytrue - ypred)[uout == 0]))
    else:
        print('MAE (All Phases):')
        return np.mean(np.abs((ytrue - ypred)))

In [3]:
data = pd.read_csv('../input/ventilator-pressure-prediction/train.csv', usecols=['pressure', 'u_out'])
ytrue = data.pressure
uout = data.u_out

In [4]:
oof1 = np.load('../input/lstm-conv1d-pl-infer/oof_preds.npy')
test1 = np.load('../input/lstm-conv1d-pl-infer/test_preds.npy')
print(mae(ytrue, oof1.reshape(-1), uout))

oof2 = np.load('../input/lstm-conv1d-ver2/oof_preds.npy')
test2 = np.load('../input/lstm-conv1d-ver2/test_preds.npy')
print(mae(ytrue, oof2.reshape(-1), uout))

oof3 = np.load('../input/lstm-mlp/oof_preds.npy')
test3 = np.load('../input/lstm-mlp/test_preds.npy')
print(mae(ytrue, oof3.reshape(-1), uout))

oof4 = np.load('../input/pulp-fiction-finetune/oof_preds.npy')
test4 = np.load('../input/pulp-fiction-finetune/test_preds.npy')
print(mae(ytrue, oof4.reshape(-1), uout))

MAE (Inspiration Phase):
0.14763511908271662
MAE (Inspiration Phase):
0.15754060539456885
MAE (Inspiration Phase):
0.1579810607216832
MAE (Inspiration Phase):
0.16000326095132464


In [5]:
pressure = ytrue.values.astype('float32')

P_MIN = np.min(pressure)
P_MAX = np.max(pressure)
P_STEP = (pressure[1] - pressure[0])
print('Min pressure: {}'.format(P_MIN))
print('Max pressure: {}'.format(P_MAX))
print('Pressure step: {}'.format(P_STEP))
print('Unique values:  {}'.format(np.unique(pressure).shape[0]))

del pressure
gc.collect()

Min pressure: -1.8957443237304688
Max pressure: 64.82099151611328
Pressure step: 0.07030248641967773
Unique values:  950


141

In [6]:
scores=(1-np.array([0.1324,0.1347,0.1360,0.1386,]))
scores=(scores)/(np.sum(scores))
print(scores)

[0.25087471 0.25020964 0.24983373 0.24908192]


In [7]:
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')
submission["pressure"] = scores[0]*sum(test1)/10 + scores[1]*sum(test2)/10 + scores[2]*sum(test3)/10 + scores[3]*sum(test4)/7
submission.to_csv('mean_submission.csv', index=False)

In [8]:
submission["pressure"] = np.median(np.vstack([test1, test2, test3, test4]),axis=0)
submission["pressure"] = np.round((submission.pressure - P_MIN)/P_STEP) * P_STEP + P_MIN
submission["pressure"] = np.clip(submission.pressure, P_MIN, P_MAX)
submission.to_csv('median_submission.csv', index=False)

In [9]:
def better_than_median(test1, test2, test3, test4, axis=0):
    """Compute the mean of the predictions if there are no outliers,
    or the median if there are outliers."""
    
    inputs = np.vstack([test1, test2, test3, test4])
    spread = inputs.max(axis=axis) - inputs.min(axis=axis) 
    spread_lim = 0.45
    print(f"Inliers:  {(spread < spread_lim).sum():7} -> compute mean")
    print(f"Outliers: {(spread >= spread_lim).sum():7} -> compute median")
    print(f"Total:    {len(inputs):7}")
    
    return np.where(spread < spread_lim,
                    scores[0]*sum(test1)/10 + scores[1]*sum(test2)/10 + scores[2]*sum(test3)/10 + scores[3]*sum(test4)/7,
                    np.median(np.vstack([test1, test2, test3, test4]),axis=0))

In [10]:
submission["pressure"] = better_than_median(test1, test2, test3, test4)
submission["pressure"] = np.round((submission.pressure - P_MIN)/P_STEP) * P_STEP + P_MIN
submission["pressure"] = np.clip(submission.pressure, P_MIN, P_MAX)
submission.to_csv('better_than_median_submission.csv', index=False)

Inliers:  1129854 -> compute mean
Outliers: 2894146 -> compute median
Total:         37


In [11]:
test1_pp = np.round((test1 - P_MIN)/P_STEP) * P_STEP + P_MIN
test1_pp = np.clip(test1, P_MIN, P_MAX)
test2_pp = np.round((test2 - P_MIN)/P_STEP) * P_STEP + P_MIN
test2_pp = np.clip(test2, P_MIN, P_MAX)
test3_pp = np.round((test3 - P_MIN)/P_STEP) * P_STEP + P_MIN
test3_pp = np.clip(test3, P_MIN, P_MAX)