First things first I want to give credit where it's due. Thank you Ulrich G. for providing the bulk of this notebook in https://www.kaggle.com/ulrich07/osic-multiple-quantile-regression-starter . The dataset is at https://www.kaggle.com/eladwar/conditionalrnn

In [None]:
import sys
sys.path.insert(0,"../input/conditionalrnn")

In [None]:
import numpy as np
import pandas as pd

import os
import matplotlib.pyplot as plt
from tqdm import tqdm

from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold,GroupKFold

import tensorflow as tf
from cond_rnn import ConditionalRNN
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
from tensorflow.keras.optimizers import Adam,Nadam
from tensorflow.keras import initializers

In [None]:
import random
def seed_everything(seed=2020):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything(43)

In [None]:
NFOLD = 5
kf = KFold(n_splits=NFOLD)

In [None]:
ROOT = "../input/osic-pulmonary-fibrosis-progression"

In [None]:
tr = pd.read_csv(f"{ROOT}/train.csv")
tr.drop_duplicates(keep=False, inplace=True, subset=['Patient','Weeks'])
chunk = pd.read_csv(f"{ROOT}/test.csv")

print("add infos")
sub = pd.read_csv(f"{ROOT}/sample_submission.csv")
sub['Patient'] = sub['Patient_Week'].apply(lambda x:x.split('_')[0])
sub['Weeks'] = sub['Patient_Week'].apply(lambda x: int(x.split('_')[-1]))
sub =  sub[['Patient','Weeks','Confidence','Patient_Week']]
sub = sub.merge(chunk.drop('Weeks', axis=1), on="Patient")

In [None]:
tr['WHERE'] = 'train'
chunk['WHERE'] = 'val'
sub['WHERE'] = 'test'
data = tr.append([chunk, sub])

In [None]:
print(tr.shape, chunk.shape, sub.shape, data.shape)

In [None]:
data['min_week'] = data['Weeks']
data.loc[data.WHERE=='test','min_week'] = np.nan
data['min_week'] = data.groupby('Patient')['min_week'].transform('min')

In [None]:
base = data.loc[data.Weeks == data.min_week]
base = base[['Patient','FVC']].copy()
base.columns = ['Patient','min_FVC']
base['nb'] = 1
base['nb'] = base.groupby('Patient')['nb'].transform('cumsum')
base = base[base.nb==1]
base.drop('nb', axis=1, inplace=True)

In [None]:
data = data.merge(base, on='Patient', how='left')
data['base_week'] = data['Weeks'] - data['min_week']
del base

# Categorical Features for RNN:
* The commented categorical feature generation works for better results in the competition (Notebook V4)
* The uncommented categorical feature generation is used particularly for less repetition (in other words, I don't want to One-hot Encode)
* You can make two models focusing on each set of generated features and ensemble 
* You can also make a model using all possible ways to generate the categorical features

In [None]:
# Create Alternate Categorical Features 
COLS = ['Sex','SmokingStatus']
FE = []
for col in COLS:
    data[col] = pd.factorize(data[col])[0]
FE.extend(COLS)
#=================
COLS = ['Sex','SmokingStatus']
# FE = []
for mod in data[col].unique():
    FE.append(mod)
    data[mod] = (data[col] == mod).astype(int)

In [None]:
# Scale features to quantiles 
data['age'] = (data['Age'] - data['Age'].min() ) / ( data['Age'].max() - data['Age'].min() )
data['BASE'] = (data['min_FVC'] - data['min_FVC'].min() ) / ( data['min_FVC'].max() - data['min_FVC'].min() )
data['week'] = (data['base_week'] - data['base_week'].min() ) / ( data['base_week'].max() - data['base_week'].min() )
data['percent'] = (data['Percent'] - data['Percent'].min() ) / ( data['Percent'].max() - data['Percent'].min() )
FE += ['age','percent','week','BASE']

In [None]:
tr = data.loc[data.WHERE=='train']
chunk = data.loc[data.WHERE=='val']
sub = data.loc[data.WHERE=='test']
del data

In [None]:
print(tr.shape, chunk.shape, sub.shape)

# What do I do to get everything ready for a categorical RNN
* Split categorical and measurement features
* Measurement features are reshaped into Recurent Neural Network Standard Format: [batch, timesteps, features]
* Category features are left in a normal shape

In [None]:
tr[FE]

In [None]:
# Get Features
# groups = tr['Patient']
y = tr['FVC'].values
z = tr[FE].values
ze = sub[FE].values
# --------------------------------------------------------
#Create oof and prediction arrays
pe = np.zeros((ze.shape[0], 3))
pred = np.zeros((z.shape[0], 3))
# --------------------------------------------------------
#Split the train set's categorical and measurement features
train_categories = z[:,0:5]#normal shape
train_measurements = z[:,5:].reshape(z.shape[0],1,-1)#[batch, timesteps, features]
# --------------------------------------------------------
#Split the test set's categorical and measurement features
test_categories = ze[:,0:5] #normal shape
test_measurements = ze[:,5:].reshape(ze.shape[0],1,-1)#[batch, timesteps, features]

# Loss & Scoring Functions

In [None]:
C1, C2 = tf.constant(70, dtype='float32'), tf.constant(1000, dtype="float32")
#=============================#
def score(y_true, y_pred):
    tf.dtypes.cast(y_true, tf.float32)
    tf.dtypes.cast(y_pred, tf.float32)
    sigma = y_pred[:, 2] - y_pred[:, 0]
    fvc_pred = y_pred[:, 1]
    
    #sigma_clip = sigma + C1
    sigma_clip = tf.maximum(sigma, C1)
    delta = tf.abs(y_true[:, 0] - fvc_pred)
    delta = tf.minimum(delta, C2)
    sq2 = tf.sqrt( tf.dtypes.cast(2, dtype=tf.float32) )
    metric = (delta / sigma_clip)*sq2 + tf.math.log(sigma_clip* sq2)
    return K.mean(metric)
#============================#
def qloss(y_true, y_pred):
    # Pinball loss for multiple quantiles
    qs = [0.2, 0.50, 0.8]
    q = tf.constant(np.array([qs]), dtype=tf.float32)
    e = y_true - y_pred
    v = tf.maximum(q*e, (q-1)*e)
    return K.mean(v)
#=============================#
def mloss(_lambda):
    def loss(y_true, y_pred):
        return _lambda * qloss(y_true, y_pred) + (1 - _lambda)*score(y_true, y_pred)
    return loss

# Conditional RNN model

In [None]:
class MySimpleModel(tf.keras.Model):
    def __init__(self):
        super(MySimpleModel, self).__init__()
        self.cond = ConditionalRNN(100, cell='LSTM', dtype=tf.float32)
        self.x = L.Dense(100, activation='elu',kernel_initializer='he_uniform', name="d2")
        self.p1 = L.Dense(3, activation="linear", name="p1")
        self.p2 = L.Dense(3, activation="relu",kernel_initializer='he_uniform', name="p2")
        self.preds = L.Lambda(lambda x: x[0] + tf.cumsum(x[1], axis=1), 
                         name="preds")

    def call(self, inputs, **kwargs):
        o = self.cond(inputs)
        o = self.x(o)
        linear = self.p1(o)
        relu = self.p2(o)
        o = self.preds([linear,relu])
        return o

In [None]:
net = MySimpleModel()
net.call([train_measurements,train_categories])
net.compile(optimizer='adam', loss=mloss(0.8), metrics=[score])
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

In [None]:
# Just double checking model inpute
print(train_measurements.shape,train_categories.shape)

In [None]:
# Setting jit/xla for greater efficiency 
tf.keras.backend.clear_session()
tf.config.optimizer.set_jit(True)

In [None]:
%%time
cnt = 0
BatchSize = 200
for tr_idx, val_idx in kf.split(z):
    cnt += 1
    
    print(f"FOLD {cnt}")
#     with tf.device('/gpu:0'):
    net.fit([train_measurements[tr_idx,:,:],train_categories[tr_idx,:]], pd.Series(y[tr_idx].astype(float).flatten()), batch_size=BatchSize, epochs=800, 
    validation_data=([train_measurements[val_idx,:,:],train_categories[val_idx,:]], pd.Series(y[val_idx].astype(float).flatten())), verbose=0) #
    print("train", net.evaluate([train_measurements[tr_idx,:,:],train_categories[tr_idx,:]], y[tr_idx], verbose=0, batch_size=BatchSize))
    print("val", net.evaluate([train_measurements[val_idx,:,:],train_categories[val_idx,:]], y[val_idx], verbose=0, batch_size=BatchSize))
    print("predict val...")
    pred[val_idx] = net.predict([train_measurements[val_idx,:,:],train_categories[val_idx,:]], batch_size=BatchSize, verbose=0)
    print("predict test...")
    pe += net.predict([test_measurements,test_categories], batch_size=BatchSize, verbose=0) / NFOLD
# ==============

In [None]:
sigma_opt = mean_absolute_error(y, pred[:,1])
unc = pred[:,2] - pred[:, 0]
sigma_mean = np.mean(unc)
print(sigma_opt, sigma_mean)

This simply checks what quantile reveals the best results, if you want to do some extra data transformation toward the end to boost your score

In [None]:
increment = 0.001
error = []
for i in np.arange(0,1,increment):
    quant_5 = np.quantile(pred,i,axis=1)
#     print(mean_absolute_error(y,quant_5))
    error.append(mean_absolute_error(y,quant_5))
    
print('Best Quantile:',np.arange(0,1,increment)[np.argmin(error)])
print('Best MAE of Optimized Quantile:',error[np.argmin(error)])
print('Baseline MAE:', mean_absolute_error(y,pred[:,1]))
# Optimized vs Baseline Graphs
plt.plot(quant_5,c='g')
plt.title('Optimized vs Baseline')
plt.plot(pred[:,1])
plt.plot(y)
plt.tight_layout()
plt.show()

In [None]:
import math
import scipy as sp
from functools import partial
scoring_df = pd.DataFrame(z)
scoring_df['FVC_pred'] = pred[:,1]
scoring_df['FVC'] = y
# baseline score
scoring_df['Confidence'] = 100
scoring_df['sigma_clipped'] = scoring_df['Confidence'].apply(lambda x: max(x, 70))
scoring_df['diff'] = abs(scoring_df['FVC'] - scoring_df['FVC_pred'])
scoring_df['delta'] = scoring_df['diff'].apply(lambda x: min(x, 1000))
scoring_df['score'] = -math.sqrt(2)*scoring_df['delta']/scoring_df['sigma_clipped'] - np.log(math.sqrt(2)*scoring_df['sigma_clipped'])
score = scoring_df['score'].mean()
print(score)

def loss_func(weight, row):
    confidence = weight
    sigma_clipped = max(confidence, 70)
    diff = abs(row['FVC'] - row['FVC_pred'])
    delta = min(diff, 1000)
    score = -math.sqrt(2)*delta/sigma_clipped - np.log(math.sqrt(2)*sigma_clipped)
    return -score

results = []
tk0 = tqdm(scoring_df.iterrows(), total=len(scoring_df))
for _, row in tk0:
    loss_partial = partial(loss_func, row=row)
    weight = [100]
    result = sp.optimize.minimize(loss_partial, weight, method='SLSQP')
    x = result['x']
    results.append(x[0])

# optimized score
scoring_df['Confidence'] = results
scoring_df['sigma_clipped'] = scoring_df['Confidence'].apply(lambda x: max(x, 70))
scoring_df['diff'] = abs(scoring_df['FVC'] - scoring_df['FVC_pred'])
scoring_df['delta'] = scoring_df['diff'].apply(lambda x: min(x, 1000))
scoring_df['score'] = -math.sqrt(2)*scoring_df['delta']/scoring_df['sigma_clipped'] - np.log(math.sqrt(2)*scoring_df['sigma_clipped'])
score = scoring_df['score'].mean()
print(score)

In [None]:
idxs = np.random.randint(0, y.shape[0], 100)
plt.plot(y[idxs], label="ground truth")
plt.plot(pred[idxs, 0], label="q25")
plt.plot(pred[idxs, 1], label="q50")
plt.plot(pred[idxs, 2], label="q75")
plt.legend(loc="best")
plt.show()

In [None]:
print(unc.min(), unc.mean(), unc.max(), (unc>=0).mean())

In [None]:
plt.hist(unc)
plt.title("Difference between 20th and 80th Quantiles")
plt.show()

### PREDICTION

In [None]:
sub['FVC1'] = pe[:,1]
sub['Confidence1'] = (pe[:, 2] - pe[:, 0])

In [None]:
subm = sub[['Patient_Week','FVC','Confidence','FVC1','Confidence1']].copy()

In [None]:
subm.loc[~subm.FVC1.isnull()].head(10)

In [None]:
subm.loc[~subm.FVC1.isnull(),'FVC'] = subm.loc[~subm.FVC1.isnull(),'FVC1']
if sigma_mean<70:
    subm['Confidence'] = sigma_opt
else:
    subm.loc[~subm.FVC1.isnull(),'Confidence'] = subm.loc[~subm.FVC1.isnull(),'Confidence1']

In [None]:
subm.head()

In [None]:
subm.describe().T

In [None]:
subm["Confidence"]

In [None]:
subm[["Patient_Week","FVC","Confidence"]].to_csv("submission.csv", index=False)