## Setup

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
from tqdm import tqdm
import seaborn as sns
import wandb
from wandb.keras import WandbCallback
import keras
from keras.models import Sequential
import pydicom
import matplotlib.pyplot as plt
import cv2
import pathlib
from os import listdir
from scipy.stats import gmean

SQRT2 = tf.sqrt(tf.dtypes.cast(2, dtype=tf.float32)) #ouch

from pfutils import (get_test_data, get_train_data, get_pseudo_test_data, get_exponential_decay_lr_callback, TTA_on_test,
                     build_model, get_cosine_annealing_lr_callback, get_fold_indices, DataGenerator, make_lungmask)

from pfutils import (absolute_delta_error, sigma_cost, delta_over_sigma, optimal_sigma_loss_function, 
                    Laplace_metric, Laplace_log_likelihood, experimental_loss_function)

WANDB = False
SUBMIT = True
TRAIN_ON_BACKWARD_WEEKS = False

#If TEST is False use this to simulate tractable testcases. Should be 0 if SUBMIT = True
PSEUDO_TEST_PATIENTS = 0

In [None]:
if SUBMIT:
    PSEUDO_TEST_PATIENTS = 0
    WANDB = False

In [None]:
if WANDB:    
    # retrieve W&B key
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    wandb_key = user_secrets.get_secret("wandb_key")
    assert wandb_key, "Please create a key.txt or Kaggle Secret with your W&B API key"

    !pip install wandb -qqq --upgrade
    !wandb login $wandb_key

## Settings And network

In [None]:
#State whether model should predict slope or single weeks
#Predicting the slope is making the assumption that the decrease is linear
PREDICT_SLOPE = False

#Image Flags
USE_IMAGES = False
APPLY_LUNGMASK = False
DIM = 224
IMG_FEATURES = 22
EFFNET = 0
USE_THREE_LAYERS = True

OPTIMAL_SIGMA_LOSS = False
COSINE_CYCLES = 5

#Dropout rate
DROP_OUT_RATE = 0
DROP_OUT_LAYERS = [] # [0,1,2] voor dropout in de eerste 3 lagen

#L2-Regularization
L2_REGULARIZATION = False
REGULARIZATION_CONSTANT = 0.0001

In [None]:
# Number of folds. A number between 1 and 176-PSEUDO_TEST_PATIENTS. 176 = 2^4 * 11
FOLDS = 5

#Batch size
BATCH_SIZE = 128

#Amount of features inputted in NN
NUMBER_FEATURES = 10

In [None]:
#TTA steps and TTA gaussian multiplier
TTA_STEPS = 1
TTA_MULTIPLIER = 0

#Hidden layers
HIDDEN_LAYERS = [32,32]

#Gaussian Noise (the reported std error for FVC measurement devices is 70)
#NOISE_SDS : [WeekInit, WeekTarget, WeekDiff, FVC, Percent, Age, Sex, CurrentlySmokes, Ex-smoker, Never Smoked]
NOISE_SDS = [10,10,1] + [500, 0, 10] + [0.25] + 3*[0.25]
#GAUSSIAN_NOISE_CORRELATED is a boolean indicating if the gaussians added to FVC on X and y are perfectly correlated or independent
GAUSSIAN_NOISE_FVC_CORRELATED = True
ADD_NOISE_FVC_TO_PERCENT = True
                           
#Activation function to use ('swish', 'leakyrelu' or 'relu')
ACTIVATION_FUNCTION = 'swish'

#Experimenting with loss
LOSS_MODIFICATION = 1 #(sqrt2 * delta / 70) * LOSS_MODIFICATION is added to the loss function (a value of 1 gives roughly equal weight to delta and sigma)

#Batch normalization
BATCH_NORMALIZATION = False
PRE_BATCH_NORMALIZATION = False
BATCH_RENORMALIZATION = False

#Train length
EPOCHS = 500

#Input and/or output normalization
INPUT_NORMALIZATION = True
OUTPUT_NORMALIZATION = True
NEGATIVE_NORMALIZATION = False

#Learning rate
LEARNING_RATE_SCHEDULER = 'exp' #'exp', 'cos' or None
MAX_LEARNING_RATE = 0.001
EPOCHS_PER_OOM_DECAY = 300 #OoM : Order of Magnitude

MODEL_NAME = "TestNoPercentNoOverfit"

config = dict(NUMBER_FEATURES = NUMBER_FEATURES, L2_REGULARIZATION = L2_REGULARIZATION, INPUT_NORMALIZATION = INPUT_NORMALIZATION, BATCH_RENORMALIZATION = BATCH_RENORMALIZATION,
              ACTIVATION_FUNCTION = ACTIVATION_FUNCTION, DROP_OUT_RATE = DROP_OUT_RATE, OUTPUT_NORMALIZATION = OUTPUT_NORMALIZATION, PRE_BATCH_NORMALIZATION = PRE_BATCH_NORMALIZATION,
              EPOCHS = EPOCHS, MAX_LEARNING_RATE = MAX_LEARNING_RATE, LOSS_MODIFICATION = LOSS_MODIFICATION, NOISE_SDS = NOISE_SDS, OPTIMAL_SIGMA_LOSS = OPTIMAL_SIGMA_LOSS,
              COSINE_CYCLES = COSINE_CYCLES, MODEL_NAME=MODEL_NAME, LEARNING_RATE_SCHEDULER = LEARNING_RATE_SCHEDULER, PREDICT_SLOPE = PREDICT_SLOPE,
              HIDDEN_LAYERS = HIDDEN_LAYERS, REGULARIZATION_CONSTANT = REGULARIZATION_CONSTANT, EPOCHS_PER_OOM_DECAY = EPOCHS_PER_OOM_DECAY,
              DROP_OUT_LAYERS = DROP_OUT_LAYERS, BATCH_SIZE = BATCH_SIZE, GAUSSIAN_NOISE_FVC_CORRELATED = GAUSSIAN_NOISE_FVC_CORRELATED, TTA_STEPS = TTA_STEPS,
              ADD_NOISE_FVC_TO_PERCENT = ADD_NOISE_FVC_TO_PERCENT, NEGATIVE_NORMALIZATION = NEGATIVE_NORMALIZATION, BATCH_NORMALIZATION = BATCH_NORMALIZATION,
              APPLY_LUNGMASK = APPLY_LUNGMASK, USE_IMAGES = USE_IMAGES, DIM = DIM, IMG_FEATURES = IMG_FEATURES, EFFNET = EFFNET, TTA_MULTIPLIER = TTA_MULTIPLIER)

In [None]:
if SUBMIT:
    test_data, submission = get_test_data("../input/osic-pulmonary-fibrosis-progression/test.csv")
    
test_data["Percent"] = 0
train_data, train_images, train_labels = get_train_data('../input/osic-pulmonary-fibrosis-progression/train.csv', PSEUDO_TEST_PATIENTS, TRAIN_ON_BACKWARD_WEEKS, USE_IMAGES, APPLY_LUNGMASK, DIM)
train_data["Percent"] = 0
np.save("train_data.npy", train_data.to_numpy())
np.save("train_images.npy", np.array(train_images))
np.save("train_labels.npy", train_labels.to_numpy())

if PSEUDO_TEST_PATIENTS > 0:
    test_data, test_check = get_pseudo_test_data('../input/osic-pulmonary-fibrosis-progression/train.csv', PSEUDO_TEST_PATIENTS, INPUT_NORMALIZATION)

In [None]:
model = build_model(config)
#tf.keras.utils.plot_model(model)
model.summary()

## Folds and Training

In [None]:
fold_pos = get_fold_indices(FOLDS, train_data)
print(fold_pos)

In [None]:
predictions = []

for fold in range(FOLDS):
    
    train_ID = list(range(fold_pos[0],fold_pos[fold])) + list(range(fold_pos[fold+1],fold_pos[-1]))
    val_ID = list(range(fold_pos[fold], fold_pos[fold+1]))
    # Generators
    training_generator = DataGenerator(train_ID, config)
    validation_generator = DataGenerator(val_ID, config, validation = True)
    
    model = build_model(config)
    
    sv = tf.keras.callbacks.ModelCheckpoint(
    'fold-%i.h5'%fold, monitor='val_loss', verbose=0, save_best_only=True,
    save_weights_only=True, mode='min', save_freq='epoch')
    callbacks = [sv]
    if LEARNING_RATE_SCHEDULER == 'exp':
        callbacks.append(get_exponential_decay_lr_callback(config))
    if LEARNING_RATE_SCHEDULER == 'cos':
        callbacks.append(get_cosine_annealing_lr_callback(config))

    print(fold+1, "of", FOLDS)
    if WANDB:
        name = MODEL_NAME + '-F{}'.format(fold+1)
        config.update({'fold': fold+1})
        wandb.init(project="pulfib", name = name, config=config)
        wandb_cb = WandbCallback()
        callbacks.append(wandb_cb)
        
    history = model.fit(training_generator, validation_data = validation_generator, epochs = EPOCHS,
                            verbose = 0, callbacks = callbacks)

    if SUBMIT or PSEUDO_TEST_PATIENTS > 0:
        model.load_weights('fold-%i.h5'%fold)
        TTA_test_data = TTA_on_test(test_data.to_numpy(), config)
        for j in range(TTA_STEPS):
            predictions.append(model.predict(TTA_test_data[:,:,j], batch_size = 256))
            
    if WANDB:
        # finalize run
        wandb.join()

In [None]:
if SUBMIT:
    predictions = np.abs(predictions)
    predictions[:,:,1] = np.power(predictions[:,:,1],2)
    predictions = np.mean(predictions, axis = 0)
    predictions[:,1] = np.power(predictions[:,1],0.5)
    for i in range(1,len(test_data)+1):
        submission.loc[i,"FVC"] = predictions[i-1,0]
        submission.loc[i, "Confidence"] = predictions[i-1,1]
    submission.to_csv("submission.csv", index = False)
    

In [None]:
if PSEUDO_TEST_PATIENTS > 0:
    quadraticmeans = []
    for j in range(0,11):
        result = []
        for i in range(-20,20):
            postprocess = np.abs(predictions[j])
            if i == 0:
                postprocess[:,:,1] = gmean(postprocess[:,:,1], axis = 0)
                postprocess = np.mean(postprocess, axis = 0)
            else:
                postprocess[:,:,1] = np.power(postprocess[:,:,1],i)
                postprocess = np.mean(postprocess, axis = 0)
                postprocess[:,1] = np.power(postprocess[:,1],1/i)
            FVC_true = test_check["TargetFVC"].values
            FVC_pred = postprocess[:,0]
            sigma = postprocess[:,1]

            sigma_clip = np.maximum(np.abs(sigma), 70)
            delta = np.abs(FVC_true - FVC_pred)
            delta = np.minimum(delta, 1000)

            sq2 = np.sqrt(2)
            loss = (delta / sigma_clip)*sq2 + tf.math.log(sigma_clip * sq2)
            result.append(np.mean(loss))
            if i == 2:
                quadraticmeans.append(result[-1])
        plt.plot(np.arange(-20,20),result)
        plt.show()
plt.plot(0.1*np.arange(0,11),quadraticmeans)
plt.show()