# Introduction

"Confidence" is tuned to increase by weeks

This is one of the approaches that earned my team a silver medal.

# Acknowledgements
This notebook is forked from https://www.kaggle.com/ulrich07/osic-multiple-quantile-regression-starter by @ulrich07 

if you are kind enough to upvote my notebook, please also upvote Ulrich's.

# Import libraries 

In [None]:
import numpy as np
import pandas as pd
import os
import random
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold
from tqdm.notebook import tqdm

import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow.keras.models as M

pd.set_option('display.max_columns', 60)
pd.set_option('display.max_rows', 100)

In [None]:
def seed_everything(seed=2020):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything(42)

# Preprocessing

In [None]:
ROOT = "../input/osic-pulmonary-fibrosis-progression"

tr = pd.read_csv(f"{ROOT}/train.csv")
tr.drop_duplicates(keep=False, inplace=True, subset=['Patient','Weeks'])
chunk = pd.read_csv(f"{ROOT}/test.csv")

print("add infos")
sub = pd.read_csv(f"{ROOT}/sample_submission.csv")
sub['Patient'] = sub['Patient_Week'].apply(lambda x:x.split('_')[0])
sub['Weeks'] = sub['Patient_Week'].apply(lambda x: int(x.split('_')[-1]))
sub =  sub[['Patient','Weeks','Confidence','Patient_Week']]
sub = sub.merge(chunk.drop('Weeks', axis=1), on="Patient")

In [None]:
tr['WHERE'] = 'train'
chunk['WHERE'] = 'val'
sub['WHERE'] = 'test'
data = tr.append([chunk, sub])

print(tr.shape, chunk.shape, sub.shape, data.shape)
print(tr.Patient.nunique(), chunk.Patient.nunique(), sub.Patient.nunique(), 
      data.Patient.nunique())

In [None]:
data['min_week'] = data['Weeks']
data.loc[data.WHERE=='test','min_week'] = np.nan
data['min_week'] = data.groupby('Patient')['min_week'].transform('min')

base = (
    data
    .loc[data.Weeks == data.min_week][['Patient','FVC', 'Percent']]
    .rename({'FVC': 'base_FVC', 'Percent':'base_Percent'}, axis=1)
    .groupby('Patient')
    .first()
    .reset_index()
)

In [None]:
data = data.merge(base, on='Patient', how='left')
data['base_week'] = data['Weeks'] - data['min_week']
del base

In [None]:
FE = list(data.Sex.unique()) + list(data.SmokingStatus.unique())
data = pd.concat([
    data,
    pd.get_dummies(data.Sex),
    pd.get_dummies(data.SmokingStatus)
], axis=1)

In [None]:
def Normalization(df):
    
    def get_fillness(series):
        return (series - series.min()) / (series.max() - series.min())

    df['Age'] = get_fillness(df['Age'])
    df['base_FVC'] = get_fillness(df['base_FVC'])
    df['base_week'] = get_fillness(df['base_week'])
    df['base_Percent'] = get_fillness(df['base_Percent'])
    
    return df

FE += ['Age','base_FVC','base_week','base_Percent']
data = Normalization(data)

In [None]:
FE

In [None]:
tr = data.loc[data.WHERE=='train']
chunk = data.loc[data.WHERE=='val']
sub = data.loc[data.WHERE=='test']
del data

In [None]:
tr.shape, chunk.shape, sub.shape

# Quantile Regression

In [None]:
# Activation >> Mish
from tensorflow.keras.layers import Activation
from tensorflow.keras.utils import get_custom_objects

class Mish(Activation):
    def __init__(self, activation, **kwargs):
        super(Mish, self).__init__(activation, **kwargs)
        self.__name__ = 'Mish'

def mish(inputs):
    return inputs * tf.math.tanh(tf.math.softplus(inputs))

get_custom_objects().update({'Mish': Mish(mish)})

In [None]:
C1, C2 = tf.constant(70, dtype='float32'), tf.constant(1000, dtype="float32")
#=============================#
def score(y_true, y_pred):
    tf.dtypes.cast(y_true, tf.float32)
    tf.dtypes.cast(y_pred, tf.float32)
    sigma = y_pred[:, 2] - y_pred[:, 0]
    fvc_pred = y_pred[:, 1]
    sigma_clip = tf.maximum(sigma, C1)
    delta = tf.abs(y_true[:, 0] - fvc_pred)
    delta = tf.minimum(delta, C2)
    sq2 = tf.sqrt( tf.dtypes.cast(2, dtype=tf.float32) )
    metric = (delta / sigma_clip)*sq2 + tf.math.log(sigma_clip* sq2)
    return K.mean(metric)
#============================#
def qloss(y_true, y_pred):
    # Pinball loss for multiple quantiles
    qs = [0.2, 0.50, 0.8]
    q = tf.constant(np.array([qs]), dtype=tf.float32)
    e = y_true - y_pred
    v = tf.maximum(q*e, (q-1)*e)
    return K.mean(v)
#=============================#
def mloss(_lambda):
    def loss(y_true, y_pred):
        return _lambda * qloss(y_true, y_pred) + (1 - _lambda)*score(y_true, y_pred)
    return loss
#=================
def make_model(nh):
    z = L.Input((nh,), name="Patient")
    x = L.Dense(100, activation="Mish", name="d1")(z)
    x = L.Dense(100, activation="Mish", name="d2")(x)
    p1 = L.Dense(3, activation="linear", name="p1")(x)
    p2 = L.Dense(3, activation="relu", name="p2")(x)
    preds = L.Lambda(lambda x: x[0] + tf.cumsum(x[1], axis=1), 
                     name="preds")([p1, p2])
    model = M.Model(z, preds, name="NN")
    model.compile(loss=mloss(0.8),
                  optimizer=tf.keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.01, amsgrad=False), metrics=[score])
    return model

In [None]:
def calc_cv_score(y_true, y_pred):
    sigma = y_pred[:, 2] - y_pred[:, 0]
    fvc_pred = y_pred[:, 1]
    sigma_clip = np.maximum(sigma, 70)
    delta = np.abs(y_true[:, 0] - fvc_pred)
    delta = np.minimum(delta, 1000)
    sq2 = np.sqrt(2.)
    metric = (delta / sigma_clip)*sq2 + np.log(sigma_clip* sq2)
    return -np.mean(metric)

In [None]:
%%time
cnt = 0
BATCH_SIZE =256
EPOCHS = 1500
NFOLD = 11

kf = GroupKFold(n_splits=NFOLD)

y = tr['FVC'].values.astype('float32')
z = tr[FE].values
ze = sub[FE].values
nh = z.shape[1]
pe = np.zeros((ze.shape[0], 3))
pred = np.zeros((z.shape[0], 3))


for tr_idx, val_idx in kf.split(z, y, tr['Patient']):
    cnt += 1
    print(f"FOLD {cnt}")
    net = make_model(nh)
    
    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=200, min_delta = 0.000001,
                                          verbose=1, mode='min')
    lr_sch = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.4, patience=50,
                                                  verbose=0, mode='min', min_delta=0.000001, cooldown=0, min_lr=0)
    net.fit(z[tr_idx], y[tr_idx], batch_size=BATCH_SIZE, epochs=EPOCHS,
            callbacks = [es, lr_sch], 
            validation_data=(z[val_idx], y[val_idx]), verbose=0)
    
    print("train", net.evaluate(z[tr_idx], y[tr_idx], verbose=0, batch_size=BATCH_SIZE))
    print("val", net.evaluate(z[val_idx], y[val_idx], verbose=0, batch_size=BATCH_SIZE))
    
    print("predict val...")
    pred[val_idx] = net.predict(z[val_idx], batch_size=BATCH_SIZE, verbose=0)
    print(calc_cv_score(y[val_idx].reshape(-1, 1), pred[val_idx]))

    print("predict test...")
    pe += net.predict(ze, batch_size=BATCH_SIZE, verbose=0) / NFOLD

    #==============
print("CV SCORE", calc_cv_score(y.reshape(-1, 1), pred))

# Confidence Tuning

In [None]:
import optuna
from functools import partial

tr['FVC_pred'] = pred[:, 1]
tr['Confidence_pred'] = pred[:, 2] - pred[:, 0]

df_last_3 = tr.groupby('Patient').tail(3).reset_index(drop=True)
X = df_last_3[['Weeks', 'FVC', 'FVC_pred', 'Confidence_pred']].values
C = 0

def calc_tunned_score(y_true, y_pred, Conf):
    sigma = Conf
    fvc_pred = y_pred
    sigma_clip = np.maximum(sigma, 70)
    delta = np.abs(y_true - fvc_pred)
    delta = np.minimum(delta, 1000)
    sq2 = np.sqrt(2.)
    metric = (delta / sigma_clip)*sq2 + np.log(sigma_clip*sq2)
    return -np.mean(metric)

def objective(trial, X, y):
    # create hyperparameters
    a = trial.suggest_uniform('a', 0, 15)
    b = trial.suggest_uniform('b', -100, 100)
    
    y = a * X[:, 0] + b
    New_Confidence = X[:, 3] + y

    # calculate score
    return calc_tunned_score(X[:, 1], X[:, 2], New_Confidence)

n_trials = 500
obj = partial(objective, X=X, y=C)
study = optuna.create_study(direction="maximize")
optuna.logging.disable_default_handler()
study.optimize(obj, n_trials=n_trials)

In [None]:
print('last 3 score befor tuning', calc_tunned_score(X[:, 1], X[:, 2], X[:, 3]))
print('last 3 score after tuning', study.best_value)
param = {k:v for k,v in study.best_params.items()}
print('param', param)
print('Training data score', calc_tunned_score(tr['FVC'].values, tr['FVC_pred'].values, tr['Confidence_pred'].values+param['a']*tr['Weeks'].values+param['b']))

In [None]:
sigma_opt = mean_absolute_error(y, pred[:, 1])
unc = pred[:,2] - pred[:, 0]
unc = unc + (param['a'] * tr['Weeks'] + param['b'])
sigma_mean = np.mean(unc)
print(sigma_opt, sigma_mean)

In [None]:
print(unc.min(), unc.mean(), unc.max(), (unc>=0).mean())

In [None]:
idxs = np.random.randint(0, y.shape[0], 100)
plt.plot(y[idxs], label="ground truth")
plt.plot(pred[idxs, 0], label="q25")
plt.plot(pred[idxs, 1], label="q50")
plt.plot(pred[idxs, 2], label="q75")
plt.legend(loc="best")
plt.show()

plt.hist(unc, bins=30)
plt.title("uncertainty in prediction")
plt.show()

# Prediction

In [None]:
# PREDICTION
sub['FVC1'] = pe[:, 1]
sub['Confidence1'] = pe[:, 2] - pe[:, 0]
subm = sub[['Patient_Week','FVC','Confidence','FVC1','Confidence1', 'Weeks']].copy()

subm.loc[~subm.FVC1.isnull(),'FVC'] = subm.loc[~subm.FVC1.isnull(),'FVC1']
if sigma_mean<70:
    subm['Confidence'] = sigma_opt
else:
    subm.loc[~subm.FVC1.isnull(),'Confidence'] = subm.loc[~subm.FVC1.isnull(),'Confidence1'] + param['a'] * subm.loc[~subm.FVC1.isnull(),'Weeks'] + param['b']

In [None]:
subm.head()

In [None]:
subm.describe().T

In [None]:
otest = pd.read_csv(f"{ROOT}/test.csv")

for i in range(len(otest)):
    subm.loc[subm['Patient_Week']==otest.Patient[i]+'_'+str(otest.Weeks[i]), 'FVC'] = otest.FVC[i]
    subm.loc[subm['Patient_Week']==otest.Patient[i]+'_'+str(otest.Weeks[i]), 'Confidence'] = 0.1

subm[["Patient_Week","FVC","Confidence"]].to_csv("submission.csv", index=False)

In [None]:
# https://www.kaggle.com/carlossouza/bayesian-experiments

subm['Patient'] = subm['Patient_Week'].apply(lambda x:x.split('_')[0])
subm['Weeks'] = subm['Patient_Week'].apply(lambda x: int(x.split('_')[-1]))

def chart(df, patient_id, ax):
    
    plot_data = df[df['Patient'] == patient_id]
    x = plot_data['Weeks']
    FVC_low = plot_data['FVC'] - plot_data['Confidence']
    FVC_high = plot_data['FVC'] + plot_data['Confidence']
    
    plot_data_tr = tr[tr['Patient'] == patient_id]
    ax.plot(plot_data_tr['Weeks'], plot_data_tr['FVC'], 'o')
    ax.plot(x, plot_data['FVC'])
    ax.fill_between(x.values, FVC_low.values, FVC_high.values,
                        alpha=0.5, color='#ffcd3c')
    ax.set_title(patient_id)
    ax.set_ylabel('FVC')
    ax.set_ylim(min(FVC_low)-100, max(FVC_high)+100)

f, axes = plt.subplots(2, 3, figsize=(15, 10))
chart(subm, 'ID00419637202311204720264', axes[0, 0])
chart(subm, 'ID00421637202311550012437', axes[0, 1])
chart(subm, 'ID00422637202311677017371', axes[0, 2])
chart(subm, 'ID00423637202312137826377', axes[1, 0])
chart(subm, 'ID00426637202313170790466', axes[1, 1])