In [None]:
import os
import cv2

import pydicom
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.optimizers.schedules import PiecewiseConstantDecay

import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

print(tf.__version__)

## Decay theory
Input for test:
   * FVC in n week
   * Percent in n week 
   * Age
   * Sex
   * Smoking status
   * CT in n week
   
Result:
   * FVC in any week
   * percent in any week
   
$FVC = a.quantile(0.75) * (week - week_{test}) + FVC_{test}$

$Confidence = Percent + a.quantile(0.75) * abs(week - week_{test}) $

So let's try predict coefficient a. 

In [None]:
# CONSTANTS
BATCH_SIZE = 32
assert tf.test.is_gpu_available()
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [None]:
train = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv') 

In [None]:
train.head()

In [None]:
train.SmokingStatus.unique()

In [None]:
def get_tab(df):
    vector = [(df.Age.values[0] - 30) / 30] 
    
    if df.Sex.values[0] == 'Male':
       vector.append(0)
    else:
       vector.append(1)
    
    if df.SmokingStatus.values[0] == 'Never smoked':
        vector.extend([0,0])
    elif df.SmokingStatus.values[0] == 'Ex-smoker':
        vector.extend([1,1])
    elif df.SmokingStatus.values[0] == 'Currently smokes':
        vector.extend([0,1])
    else:
        vector.extend([1,0])
    return np.array(vector) 

In [None]:
A = {} 
TAB = {} 
P = [] 
for i, p in tqdm(enumerate(train.Patient.unique())):
    sub = train.loc[train.Patient == p, :] 
    fvc = sub.FVC.values
    weeks = sub.Weeks.values
    c = np.vstack([weeks, np.ones(len(weeks))]).T
    a, b = np.linalg.lstsq(c, fvc)[0]
    
    A[p] = a
    TAB[p] = get_tab(sub)
    P.append(p)

## CNN for coeff prediction

In [None]:
def get_img(path):
    d = pydicom.dcmread(path)
    return cv2.resize((d.pixel_array - d.RescaleIntercept) / (d.RescaleSlope * 1000), (512, 512))

In [None]:
# x, y = [], []
# for p in tqdm(train.Patient.unique()):
#     try:
#         ldir = os.listdir(f'../input/osic-pulmonary-fibrosis-progression-lungs-mask/mask_noise/mask_noise/{p}/')
#         numb = [float(i[:-4]) for i in ldir]
#         for i in ldir:
#             x.append(cv2.imread(f'../input/osic-pulmonary-fibrosis-progression-lungs-mask/mask_noise/mask_noise/{p}/{i}', 0).mean())
#             y.append(float(i[:-4]) / max(numb))
#     except:
#         pass

In [None]:
# plt.figure(figsize=(10,10))

# plt.plot(y, x, '.', alpha=0.25)
# plt.axvspan(0.17, 0.8, color='red')

In [None]:
import seaborn as sns

sns.distplot(list(A.values()));

In [None]:
from tensorflow.keras.utils import Sequence

class IGenerator(Sequence):
    BAD_ID = ['ID00011637202177653955184', 'ID00052637202186188008618']
    def __init__(self, keys, a, tab):
        self.keys = [k for k in keys if k not in self.BAD_ID]
        self.a = a
        self.tab = tab
        self.batch_size = BATCH_SIZE
        
        self.train_data = {}
        for p in train.Patient.unique():
            ldir = os.listdir(f'../input/osic-pulmonary-fibrosis-progression/train/{p}/')
            numb = [float(i[:-4]) for i in ldir]
            self.train_data[p] = [i for i in os.listdir(f'../input/osic-pulmonary-fibrosis-progression/train/{p}/') 
                                  if int(i[:-4]) / len(ldir) < 0.8 and int(i[:-4]) / len(ldir) > 0.15]
    
    def __len__(self):
        return 1000
    
    def __getitem__(self, idx):
        x = []
        a, tab = [], [] 
        keys = np.random.choice(self.keys, size = self.batch_size)
        for k in keys:
            try:
                i = np.random.choice(self.train_data[k], size=1)[0]
                img = get_img(f'../input/osic-pulmonary-fibrosis-progression/train/{k}/{i}')
                mask = cv2.resize(cv2.imread(f'../input/osic-pulmonary-fibrosis-progression-lungs-mask/mask_clear/mask_clear/{k}/{i[:-4]}.jpg', 0), (512, 512))> 0
                img[~mask] = 0
                x.append(img)
                a.append(self.a[k])
                tab.append(self.tab[k])
            except:
                print(k, i)
       
        x,a,tab = np.array(x), np.array(a), np.array(tab)
        x = np.expand_dims(x, axis=-1)
        return [x, tab] , a

In [None]:
from tensorflow.keras.applications import ResNet50

In [None]:
from tensorflow.keras.layers import (
    Dense, Dropout, Activation, Flatten, Input, BatchNormalization, GlobalAveragePooling2D, Add, Conv2D, AveragePooling2D, 
    LeakyReLU, Concatenate 
)

from tensorflow.keras import Model
from tensorflow.keras.optimizers import Nadam

def get_model(shape=(512, 512, 1)):
    
    #base_model = EfficientNetB4(include_top=False, weights=None)
    base_model = ResNet50(include_top=False, weights=None, input_shape=shape)
    
    inp = Input(shape=shape)
    
    x = base_model(inp)
    
    x = GlobalAveragePooling2D()(x)
    
    inp2 = Input(shape=(4,))
    x2 = tf.keras.layers.GaussianNoise(0.2)(inp2)
    x = Concatenate()([x, x2]) 
    x = Dropout(0.6)(x) 
    x = Dense(1)(x)
    #x2 = Dense(1)(x)
    return Model([inp, inp2] , x)

In [None]:
model = get_model()
model.summary()

In [None]:
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003), loss='mae') 

In [None]:
from sklearn.model_selection import train_test_split 

tr_p, vl_p = train_test_split(P, 
                              shuffle=True, 
                              train_size= 0.8) 

In [None]:
train_gen = IGenerator(keys=tr_p, 
               a = A, 
               tab = TAB)

val_gen = IGenerator(keys=vl_p, 
           a = A, 
           tab = TAB)

In [None]:
steps_per_epoch = 100
num_epochs = 30
initial_lr = 0.001

# lr_fn = PiecewiseConstantDecay(
#     boundaries=[int(steps_per_epoch * 5),
#                 int(steps_per_epoch * 10)],
#     values=[initial_lr, initial_lr * 0.1, initial_lr * 0.01])


er = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=1e-3,
    patience=10,
    verbose=0,
    mode="auto",
    baseline=None,
    restore_best_weights=True,
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=5, min_lr=0.0001)

In [None]:
optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9)

# optimizer = tf.keras.optimizers.Adam(learning_rate=initial_lr)


In [None]:

model.compile(optimizer=optimizer, loss='mae')

In [None]:
model.fit_generator(train_gen,
                    steps_per_epoch = steps_per_epoch,
                    validation_data=val_gen,
                    validation_steps = 20,
                    callbacks = [er, reduce_lr],
                    epochs=num_epochs,
                    workers=4)

In [None]:
def score(fvc_true, fvc_pred, sigma):
    sigma_clip = np.maximum(sigma, 70)
    delta = np.abs(fvc_true - fvc_pred)
    delta = np.minimum(delta, 1000)
    sq2 = np.sqrt(2)
    metric = (delta / sigma_clip)*sq2 + np.log(sigma_clip* sq2)
    return np.mean(metric)

In [None]:
from tqdm.notebook import tqdm

metric = []
for q in tqdm(range(1, 10)):
    m = []
    for p in vl_p:
        x = [] 
        tab = [] 
        
        if p in ['ID00011637202177653955184', 'ID00052637202186188008618']:
            continue
            
        ldir = os.listdir(f'../input/osic-pulmonary-fibrosis-progression/train/{p}/')
        for i in ldir:
            if int(i[:-4]) / len(ldir) < 0.8 and int(i[:-4]) / len(ldir) > 0.15:
                x.append(get_img(f'../input/osic-pulmonary-fibrosis-progression/train/{p}/{i}')) 
                tab.append(get_tab(train.loc[train.Patient == p, :])) 
        if len(x) < 1:
            continue
        tab = np.array(tab) 
    
        x = np.expand_dims(x, axis=-1)
        _a = model.predict([x, tab])
        a = np.quantile(_a, q / 10)
        
        percent_true = train.Percent.values[train.Patient == p]
        fvc_true = train.FVC.values[train.Patient == p]
        weeks_true = train.Weeks.values[train.Patient == p]
        
        fvc = a * (weeks_true - weeks_true[0]) + fvc_true[0]
        percent = percent_true[0] - a * abs(weeks_true - weeks_true[0])
        m.append(score(fvc_true, fvc, percent))
    print(np.mean(m))
    metric.append(np.mean(m))

## Predict

In [None]:
q = (np.argmin(metric) + 1)/ 10
q

In [None]:
sub = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/sample_submission.csv') 
sub.head() 

In [None]:
test = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/test.csv') 
test.head()

In [None]:
A_test, B_test, P_test,W, FVC= {}, {}, {},{},{} 
STD, WEEK = {}, {} 
for p in test.Patient.unique():
    x = [] 
    tab = [] 
    ldir = os.listdir(f'../input/osic-pulmonary-fibrosis-progression/test/{p}/')
    for i in ldir:
        if int(i[:-4]) / len(ldir) < 0.8 and int(i[:-4]) / len(ldir) > 0.15:
            x.append(get_img(f'../input/osic-pulmonary-fibrosis-progression/test/{p}/{i}')) 
            tab.append(get_tab(test.loc[test.Patient == p, :])) 
    if len(x) <= 1:
        continue
    tab = np.array(tab) 
            
    x = np.expand_dims(x, axis=-1) 
    _a = model.predict([x, tab]) 
    a = np.quantile(_a, q)
    A_test[p] = a
    B_test[p] = test.FVC.values[test.Patient == p] - a*test.Weeks.values[test.Patient == p]
    P_test[p] = test.Percent.values[test.Patient == p] 
    WEEK[p] = test.Weeks.values[test.Patient == p]

In [None]:
for k in sub.Patient_Week.values:
    p, w = k.split('_')
    w = int(w) 
    
    fvc = A_test[p] * w + B_test[p]
    sub.loc[sub.Patient_Week == k, 'FVC'] = fvc
    sub.loc[sub.Patient_Week == k, 'Confidence'] = (
        P_test[p] - A_test[p] * abs(WEEK[p] - w)
)
    

In [None]:
sub.head()

In [None]:
sub[["Patient_Week","FVC","Confidence"]].to_csv("submission.csv", index=False)

In [None]:
print('success')
print('success')