In [None]:
# !pip install --no-deps ../input/classification-models/classification_models-1.0.0
# !pip install --no-deps ../input/keras-applications

In [None]:
import cv2
import gc
import numpy as np
import os
import pandas as pd
import pydicom as dicom
import random

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.utils import *
from tensorflow.keras.metrics import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.losses import *
from tensorflow.keras.callbacks import *

from scipy.ndimage import zoom

from sklearn.model_selection import KFold, train_test_split

In [None]:
MIN_MAX = {
    'Weeks': (-5., 133.),
    'FVC': (827., 6399.),
    'Percent': (28.877577, 153.145378),
    'Age': (49., 88.),
    'typical_fvc': (827., 6399.),
}

In [None]:
# CONFIGS
IMG_SIZE = 48
NUM_OF_SCANS = 48
BATCH_SIZE = 64
TEST_DF = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/test.csv')
SAMPLE_SUBMISSION = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/sample_submission.csv')
model_path = '../input/linear-model-osic-v2'
model_weights = [os.path.join(model_path, x) for x in os.listdir(model_path)]
training_features = ['Weeks', 'min_week', 'min_week_FVC', 'typical_fvc', 'Age', 'Male', 'Female', 'Never smoked', 'Currently smokes', 'Ex-smoker']
num_of_features = len(training_features)

In [None]:
# Data preprocessing
def create_typical_fvc(df):
    df['typical_fvc'] = df['FVC'] / df['Percent'] * 100.
    return df

def normalize(df):
    for feature, min_max in MIN_MAX.items():
        df[feature] = (df[feature] - min_max[0]) / (min_max[1] - min_max[0])
    return df

TEST_DF = create_typical_fvc(TEST_DF)
TEST_DF['min_week'] = np.zeros(len(TEST_DF), dtype='int')
TEST_DF['min_week_FVC'] = np.zeros(len(TEST_DF), dtype='int')

TEST_DF['Never smoked'] = (TEST_DF['SmokingStatus'] == 'Never smoked').astype('uint8')
TEST_DF['Currently smokes'] = (TEST_DF['SmokingStatus'] == 'Currently smokes').astype('uint8')
TEST_DF['Ex-smoker'] = (TEST_DF['SmokingStatus'] == 'Ex-smoker').astype('uint8')

TEST_DF['Male'] = (TEST_DF['Sex'] == 'Male').astype('uint8')
TEST_DF['Female'] = (TEST_DF['Sex'] == 'Female').astype('uint8')
    
TEST_DF = normalize(TEST_DF)
for patient in np.unique(TEST_DF['Patient']):
    TEST_DF['min_week'][TEST_DF['Patient'] == patient] = TEST_DF['Weeks'][TEST_DF['Patient'] == patient].min()
    TEST_DF['min_week_FVC'][TEST_DF['Patient'] == patient] = TEST_DF['FVC'][TEST_DF['Patient'] == patient].values[0]

TEST_DF.head()

In [None]:
def get_pixels_hu(scan):
    image = scan.pixel_array
    image = image.astype(np.int16)

    # Convert to Hounsfield units (HU)
    slope = scan.RescaleSlope
    intercept = scan.RescaleIntercept
    window_center = -200
    window_width = 2000
    if slope != 1:
        image = slope * image.astype(np.float64)
        image = image.astype(np.int16)
    image += np.int16(intercept)

    image_min = window_center - window_width//2
    image_max = window_center + window_width//2
    image[image < image_min] = image_min
    image[image > image_max] = image_max
    
    image = image.astype(np.float64)

    image = (image - image_min)/(image_max - image_min)*255.

    return image.astype(np.uint8)

volumes = {}
for patient_id in np.unique(TEST_DF['Patient']):
    image_folder = f'../input/osic-pulmonary-fibrosis-progression/test/{patient_id}'
    image_files = np.asarray(os.listdir(image_folder))
    image_files = image_files
    scans = [dicom.dcmread(os.path.join(image_folder, image_file)) for image_file in image_files]
    images = np.asarray([cv2.resize(get_pixels_hu(scan), (IMG_SIZE , IMG_SIZE)) for scan in scans], dtype='float32')
    d, h, w = images.shape
    d_ratio = NUM_OF_SCANS / d
    images = zoom(images, (d_ratio, 1., 1.))
    volumes[patient_id] = images

In [None]:
# Data Generator

class Dataset(Sequence):
    def __init__(self, batch_size = BATCH_SIZE, mode=0):
        self.indices = np.arange(0, len(SAMPLE_SUBMISSION), 1)
        self.batch_size = batch_size
        self.mode = mode # 0 - Training, 1 - Test
    
    def __len__(self):
        return len(self.indices) // self.batch_size
        
    def get_tabular(self, patient, week):
        week = (float(week) - MIN_MAX['Weeks'][0]) / (MIN_MAX['Weeks'][1] - MIN_MAX['Weeks'][0])
        tabular = [week]
        tabular += list(TEST_DF[training_features[1:]][TEST_DF['Patient'] == patient].values[0])
        return np.asarray(tabular, dtype='float32')
    
    def get_volume(self, patient_id):
        return volumes[patient_id]
    
    def __getitem__(self, index):
        if index == self.__len__() - 1:
            indices = self.indices[index*self.batch_size:]
        else:
            indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        Patient_Week = np.asarray(SAMPLE_SUBMISSION['Patient_Week'][indices])
        patient = [x.split('_')[0] for x in Patient_Week]
        week = [x.split('_')[1] for x in Patient_Week]
        images = np.asarray([self.get_volume(patient_id) for patient_id in patient], dtype=np.float32) / 255.
        images = np.expand_dims(images, axis=4)
        tabulars = np.asarray([self.get_tabular(patient[i], week[i]) for i in range(len(patient))], dtype='float32')
        return [images, tabulars]

In [None]:
# Model
def swish(x):
    return x * K.sigmoid(x)

def residual_block(x, num_of_filters):
    x1 = Conv3D(num_of_filters, kernel_size=(3, 1, 1), padding='same', kernel_initializer='he_uniform')(x)
    b1 = BatchNormalization()(x1)
    a1 = Activation('relu')(b1)
    x2 = Conv3D(num_of_filters, kernel_size=(1, 3, 3), padding='same', kernel_initializer='he_uniform')(a1)
    b2 = BatchNormalization()(x2)
    a2 = Activation('relu')(b2)
    x3 = Conv3D(num_of_filters, kernel_size=(1, 1, 1), padding='same', kernel_initializer='he_uniform')(a2)
    b3 = BatchNormalization()(x3)
    a3 = Activation('relu')(b3)
    return Add()([a1, a3])

def dense_block(x, num_of_filters):
    x1 = residual_block(x, num_of_filters)
    x2 = residual_block(x1, num_of_filters)
    return Concatenate()([x1, x2])

def build_3d_cnn(input_tensor):
    c1 = Conv3D(64, kernel_size=(5, 7, 7), strides=(1,2,2), padding='same')(input_tensor)
    b1 = BatchNormalization()(c1)
    a1 = Activation('relu')(b1)    
    p1 = MaxPool3D(pool_size=(3,3,3), strides=(1,2,2))(a1)
    
    r1 = dense_block(p1, 64)
    r1 = dense_block(r1, 64)
    p1 = MaxPool3D(pool_size=(3,3,3), strides=(1,2,2))(r1)
    
    r2 = dense_block(p1, 128)
    r2 = dense_block(r2, 128)
    p2 = MaxPool3D(pool_size=(3,3,3), strides=(1,2,2))(r2)
    
    r3 = dense_block(p2, 256)
    r3 = dense_block(r3, 256)
    
    return r3
    

def build_model(weights):
    input_img = Input(shape=(NUM_OF_SCANS, IMG_SIZE, IMG_SIZE, 1))
    x_img = build_3d_cnn(input_img)
    x_img = GlobalAveragePooling3D()(x_img)
    
    input_tabular = Input(shape=(num_of_features,))
    
    x_concat = Concatenate()([x_img, input_tabular])
    
    x_alpha = Dense(100)(x_concat)
    x_x = Dense(100)(x_concat)
    x_beta = Dense(100)(x_concat)
    
    alpha = Dense(3)(x_alpha)
    x = Dense(3)(x_x)
    beta = Dense(3)(x_beta)
    
    # alpha * x + beta
    out = Lambda(lambda x : x[0] * x[1] + x[2])([alpha, x, beta])
    
    model = tf.keras.Model(inputs = [input_img, input_tabular], outputs = out)
    
    model.load_weights(weights)

    return model

In [None]:
models = [build_model(weights) for weights in model_weights]

In [None]:
test_gen = Dataset(mode = 1)
predictions = np.zeros((len(SAMPLE_SUBMISSION), 3), dtype='float32')
for model in models:
    predictions += model.predict(test_gen, verbose = 1)

In [None]:
def denormalize(y):
    return y * (MIN_MAX['FVC'][1] - MIN_MAX['FVC'][0]) + MIN_MAX['FVC'][0]

predictions = predictions / len(models)
predictions = denormalize(predictions)
FVC = predictions[:, 1]
Confidence = predictions[:, 2] - predictions[:, 0]
SAMPLE_SUBMISSION['FVC'] = FVC.astype('int')
SAMPLE_SUBMISSION['Confidence'] = Confidence.astype('int')
SAMPLE_SUBMISSION.to_csv('submission.csv', index=False)
SAMPLE_SUBMISSION.head()