In [None]:
import numpy as np
import pandas as pd
import os
from PIL import Image
import pydicom
import matplotlib.pyplot as plt
import pylab
import cv2
from tensorflow.keras.utils import Sequence
#import gdcm
from tqdm import tqdm

import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
import tensorflow.keras.backend as K
import tensorflow.keras.regularizers as R
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold

In [None]:
EPOCHS = 5
NUM_IMAGES = 150
BATCH_SIZE = 4
FOLDS = 5
IMAGE_DIM = (NUM_IMAGES,55,55)
COMP_DIR = '../input/osic-pulmonary-fibrosis-progression/'
TRAIN_PATH = '../input/osic-pulmonary-fibrosis-progression/train'
TEST_PATH = '../input/osic-pulmonary-fibrosis-progression/test'
SUB_PATH = '../input/osic-pulmonary-fibrosis-progression/sample_submission.csv'

In [None]:
comp_dir = '../input/osic-pulmonary-fibrosis-progression'

train_data = pd.read_csv(os.path.join(comp_dir,'train.csv'))
sub = pd.read_csv(os.path.join(comp_dir,'sample_submission.csv'))
test_data = pd.read_csv(os.path.join(comp_dir,'test.csv'))
train_data.drop_duplicates(keep=False, inplace=True, subset=['Patient','Weeks'])

In [None]:
train_data_u = train_data
train_data_u = train_data_u.drop_duplicates(subset=['Patient'])
train_data_u = train_data_u.rename(columns={'Weeks':'Base_Week','FVC':'Base_FVC','Percent':'Base_Percent'})
train_data_u['Typical_FVC'] = (train_data_u.Base_FVC.values/train_data_u.Base_Percent.values)*100
train_data = train_data.merge(train_data_u.drop(['Age','Sex','SmokingStatus'],axis=1),on='Patient',how='left')

In [None]:
sub['Patient'] = sub['Patient_Week'].apply(lambda x:x.split('_')[0])
sub['Weeks'] = sub['Patient_Week'].apply(lambda x: int(x.split('_')[-1]))
sub = sub.drop(['Confidence'],axis=1)
sub =  sub[['Patient','Weeks','Patient_Week']]

In [None]:
test_data = test_data.rename(columns={'Weeks':'Base_Week','FVC':'Base_FVC','Percent':'Base_Percent'})
test_data['Typical_FVC'] = (test_data.Base_FVC.values/test_data.Base_Percent.values)*100
sub = sub.merge(test_data, how='left', on='Patient')

In [None]:
train_data['Type'] = 'train'
sub['Type'] = 'test'

In [None]:
data = train_data.append(sub)

In [None]:
data

In [None]:
prediction_col = ["FVC"]
Continuos_cols = ["Weeks","Base_Week","Base_FVC","Typical_FVC","Age","Percent","Base_Percent"]
Categorical_cols = ['Sex','Smoking_status']

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()
conti = scaler.fit_transform(data[Continuos_cols])
data[Continuos_cols] = conti

In [None]:
print(np.mean(train_data_u.query('SmokingStatus == \'Never smoked\'').Base_Percent.values))
print(np.mean(train_data_u.query('SmokingStatus == \'Currently smokes\'').Base_Percent.values))
print(np.mean(train_data_u.query('SmokingStatus == \'Ex-smoker\'').Base_Percent.values))
print(np.mean(train_data_u.query('Sex == \'Male\'').Base_Percent.values))
print(np.mean(train_data_u.query('Sex == \'Female\'').Base_Percent.values))

In [None]:
sex_m = np.zeros((len(data['Sex'].values),1))
sex_f = np.zeros((len(data['Sex'].values),1))
sm_es = np.zeros((len(data['Sex'].values),1))
sm_ns = np.zeros((len(data['Sex'].values),1))
sm_cs = np.zeros((len(data['Sex'].values),1))
for i in range(len(data['Sex'].values)):
    if data['Sex'].values[i] == 'Male':
        sex_m[i] = 1
    elif data['Sex'].values[i] == 'Female':
        sex_f[i] = 1
for i in range(len(data['SmokingStatus'].values)):
    if data['SmokingStatus'].values[i] =='Ex-smoker':
        sm_es[i] = 1
    elif data['SmokingStatus'].values[i] =='Never smoked':
        sm_ns[i] = 1
    else:
        sm_cs[i] = 1

data['sex_m'] = sex_m
data['sex_f'] = sex_f
data['sm_es'] = sm_es
data['sm_ns'] = sm_ns
data['sm_cs'] = sm_cs

In [None]:
x_cols = ['Weeks','Base_Week','Base_FVC','Age','sex_m','sex_f','sm_es','sm_ns','sm_cs']

In [None]:
x_train = data[x_cols].loc[data['Type'] == "train"].values.astype(np.float)
y_train = data[prediction_col].loc[data['Type'] == "train"].values.astype(np.float)
x_test = data[x_cols].loc[data['Type'] == "test"].values.astype(np.float)

In [None]:
data

In [None]:
x_train = x_train.astype(np.float32)
y_train = y_train.astype(np.float32)
x_test = x_test.astype(np.float32)

In [None]:
x_train.shape,y_train.shape,x_test.shape

In [None]:
type(x_train[0][3])

In [None]:
type(x_train),type(y_train),type(x_test)

In [None]:
class Data_Generator(tf.keras.utils.Sequence):
    
    def __init__(self,batch_size,patient_ids,tab_data,dim,target=None,train=True,augment=False):
        self.batch_size = batch_size
        self.image_ids = patient_ids
        self.augment = augment
        self.dim = dim
        self.target = target
        self.indices = range(len(self.image_ids))
        self.train = train
        self.tab_data = tab_data
        #self.on_epoch_end()
    
    def getimage(self,image_id):
        X1 = np.zeros((NUM_IMAGES,self.dim[1],self.dim[2], 1))
        if self.train:
            path = TRAIN_PATH
        else:
            path = TEST_PATH
        im_num = len(os.listdir(os.path.join(path,image_id)))
        if im_num < NUM_IMAGES+1:
            for i,dcm_i in enumerate(os.listdir(os.path.join(path,image_id))):
                try:
                    im = pydicom.dcmread(os.path.join(TRAIN_PATH,f'{image_id}/{dcm_i}'))
                    img = im.pixel_array/255
                    img = cv2.resize(img, (self.dim[1],self.dim[2]))
                    img = np.reshape(img,(IMAGE_DIM[1],IMAGE_DIM[2],1))
                    X1[i,] = img
                    if i>=NUM_IMAGES-1:
                        break
                except:
                    continue
        else:
            val = (im_num - NUM_IMAGES)//2
            dir_list = os.listdir(os.path.join(path,image_id))
            dir_list.sort()
            for i,dcm_i in enumerate(dir_list[val:]):
                try:
                    im = pydicom.dcmread(os.path.join(TRAIN_PATH,f'{image_id}/{dcm_i}'))
                    img = im.pixel_array/255
                    img = cv2.resize(img, (self.dim[1],self.dim[2]))
                    img = np.reshape(img,(IMAGE_DIM[1],IMAGE_DIM[2],1))
                    X1[i,] = img
                    if i>=NUM_IMAGES-1:
                        break
                except:
                    continue
               
        if self.augment == True:
            img = self.ImageAugment(img)
            return img
        return X1
    
    def on_epoch_end(self):
        return self.indices
    
    def getdata(self, image_id_list):
        X = np.empty((self.batch_size,*self.dim, 1))
        for i, im_id in enumerate(image_id_list):
            #print(i)
            X[i,] = self.getimage(im_id)
        
        return X
    '''
    def ImageAugment(self,image):
        augmentor = ImageAugmentor(image,axis_point=[self.dim/2,self.dim/2])
        augmentor.cutmix()
        #augmentor.zoom()
        augmentor.flip()
        augmentor.rotate()
        return augmentor.get_image()
    ''' 
    
    def __getitem__(self,index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        
        image_id_list = [self.image_ids[k] for k in indices]
        tab_X = np.array([self.tab_data[k] for k in indices]).astype(np.float32)
        X = self.getdata(image_id_list)
        if self.train == True:
            target_list = [self.target[k] for k in indices]
            y = np.array(target_list).astype(np.float32)
            return X,y
        return X
    
    def __len__(self):
        return int(np.floor(len(self.indices)/self.batch_size))
    

In [None]:
C1, C2 = tf.constant(70, dtype='float32'), tf.constant(1000, dtype="float32")
#=============================#
def score(y_true, y_pred):
    tf.dtypes.cast(y_true, tf.float32)
    tf.dtypes.cast(y_pred, tf.float32)
    sigma = y_pred[:,2]-y_pred[:,0]
    fvc_pred = y_pred[:,1]
    #sigma_clip = sigma + C1
    sigma_clip = tf.maximum(sigma, C1)
    delta = tf.abs(y_true[:,0] - fvc_pred)
    delta = tf.minimum(delta, C2)
    sq2 = tf.sqrt( tf.dtypes.cast(2, dtype=tf.float32) )
    metric = (delta / sigma_clip)*sq2 + tf.math.log(sigma_clip* sq2)
    return K.mean(metric)
#============================#
def qloss(y_true, y_pred):
    # Pinball loss for multiple quantiles
    qs = [0.2,0.50,0.8]
    q = tf.constant(np.array([qs]), dtype=tf.float32)
    e = y_true - y_pred
    v = tf.maximum(q*e,(q-1)*e)
    return K.mean(v)
#=============================#
def mloss(_lambda):
    def loss(y_true, y_pred):
        return _lambda * qloss(y_true, y_pred) + (1 - _lambda)*score(y_true, y_pred)
    return loss
#=================

In [None]:
x_train = data[x_cols].loc[data['Type'] == "train"].values
y_train = data[prediction_col].loc[data['Type'] == "train"].values
x_test = data[x_cols].loc[data['Type'] == "test"].values

In [None]:
model_dense = M.load_model('../input/tab-data-osic/dense_model.h5',custom_objects={'loss':mloss,'score':score})
model_dense.summary()
model_cnn = M.load_model('../input/tab-data-osic/model.h5',custom_objects={'loss':mloss,'score':score})
model_cnn.summary()

In [None]:
test_patient_ids = data['Patient'].loc[data['Type'] == "test"].values

In [None]:
for i in range(1,6):
    if len(test_patient_ids)%i ==0:
        TEST_BATCH_SIZE = i 
        

In [None]:
test_gen = Data_Generator(TEST_BATCH_SIZE,test_patient_ids,x_test,IMAGE_DIM,train=False)

In [None]:
print("Inferencing")

In [None]:
print(len(test_patient_ids))

In [None]:
TEST_BATCH_SIZE

In [None]:
pred_dense = model_dense.predict(x_test,verbose=1)
pred_cnn = model_cnn.predict(test_gen,verbose=1)

In [None]:
conf_dense = pred_dense[:,2] - pred_dense[:,0]
conf_cnn = pred_cnn[:,2] - pred_cnn[:,0]
for i in range(len(conf_cnn)):
    conf_dense[i] = max(conf_dense[i],70)
    conf_cnn[i] = max(conf_cnn[i],70)

In [None]:
len(pred_dense),len(pred_cnn)

In [None]:
pred = pred_dense*0.8 + pred_cnn*0.2
conf = conf_dense*0.8 + conf_cnn*0.2
pred_dict = {'FVC':pred[:,1],'Confidence':conf}
pred_df = pd.DataFrame(pred_dict)

In [None]:
sub['Confidence'] = pred_df['Confidence']
sub['FVC'] = pred_df['FVC']

In [None]:
subm = sub[['Patient_Week','FVC','Confidence']].copy()

In [None]:
subm.to_csv("submission.csv", index=False)

In [None]:
subm