In [1]:
# !pip install pylibjpeg gdcm
#!pip install tensorflow==2.5
# !pip  install pylibjpeg-libjpeg
# !pip install numpy==1.21
#!pip install tensorflow-gpu==2.5

In [2]:
# Importing libraries
import numpy as np
import tensorflow as tf
import os
import pathlib
import warnings
import logging
import PIL
from numba import cuda
import pandas as pd
from sklearn.model_selection import GroupKFold
warnings.filterwarnings('ignore')
logging.getLogger('tensorflow').setLevel(logging.ERROR)  # suppress warnings


In [3]:
import sys
effnet_path = os.path.join(os.getcwd(), 'automl', 'efficientnetv2')
sys.path.append(effnet_path)
import effnetv2_model

In [4]:
data_dir = pathlib.Path('D:\Datasets\siim_covid19_detection')
df = pd.read_csv(os.path.join(data_dir,'study.csv'))
label_columns = ['Negative for Pneumonia', 'Typical Appearance', 'Indeterminate Appearance', 'Atypical Appearance']

In [5]:
#create folds
gkf  = GroupKFold(n_splits = 5)
df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(gkf.split(df, groups = df.index.tolist())):
    df.loc[val_idx, 'fold'] = fold
    print(len(train_idx), len(val_idx))

4799 1200
4799 1200
4799 1200
4799 1200
4800 1199


In [6]:
# Process an image path
def process_path(file_path):
    image_name = tf.strings.split(str(file_path), os.sep).numpy()[-1].decode()
    image_id = image_name.split('_')[0]

    label = tf.cast((df.loc[df['StudyInstanceUID'] == image_id+'_study'][label_columns]).values[0], dtype = tf.int16)
    img = tf.io.read_file(file_path.numpy().decode())
    img = tf.image.decode_png(img, channels = 1)
    img =  tf.image.grayscale_to_rgb(img)
    if img.numpy().max() > 255:
        img = img/65535
    else:
        img = img/255
    # Gamma Correction
#     if img.numpy().max() < 0.49:
#         tf.image.adjust_gamma(img, 0.7)
#     elif img.numpy().min() > 0.5:
#         tf.image.adjust_gamma(img, 3)
    # img = tf.cast(img, dtype = tf.int16)
    return img, label

In [7]:
# Storing all the file names
train_dir = pathlib.Path(os.path.join(data_dir, '320px','train','study'))
train_image_paths = list(train_dir.glob('*.png'))

len(train_image_paths)

6054

In [8]:
train_image_paths = [path for path in train_image_paths if str(path).split(os.sep)[-1].split('.')[0] in list(df.StudyInstanceUID)]

In [9]:
len(train_image_paths)

5999

In [10]:
def get_paths(ids, all_paths):
    paths = [path for path in all_paths if str(path).split(os.sep)[-1].split('.')[0] in ids]
    return paths

In [11]:
# Create data pipeline
def input_pipeline(train_paths, val_paths):
    train_ds = tf.data.Dataset.from_tensor_slices([str(path) for path in train_paths])
    val_ds = tf.data.Dataset.from_tensor_slices([str(path) for path in val_paths])
    # test_ds = tf.data.Dataset.list_files(str(dataDir/'test/*/*/*.dcm'), shuffle=True)
    train_final_ds = train_ds.map(lambda x: tf.py_function(func = process_path,inp = [x], Tout = (tf.float32, tf.int16)), num_parallel_calls = tf.data.AUTOTUNE)
    valid_ds = val_ds.map(lambda x: tf.py_function(func = process_path,inp = [x], Tout = (tf.float32, tf.int16)), num_parallel_calls = tf.data.AUTOTUNE)
    # test_ds = test_ds.map(lambda x: tf.py_function(func = process_path,inp = [x], Tout = (tf.float32, tf.int16)), num_parallel_calls = tf.data.AUTOTUNE)
    train = train_final_ds.batch(batchSize).prefetch(tf.data.AUTOTUNE)
    valid = valid_ds.batch(batchSize).prefetch(tf.data.AUTOTUNE)
    # test = test_ds.batch(batchSize).prefetch(tf.data.AUTOTUNE)
    return train, valid#, test

In [12]:
def get_stats(history):
    best_stats ={}
    best_val_loss_index = np.argmin(history.history['val_loss'])
    for key, value in history.history.items():
        if 'val_auc' in key:
            best_stats['val_auc'] = round(value[best_val_loss_index], 4)
        elif 'auc' in key:
            best_stats['auc'] = round(value[best_val_loss_index], 4)
        else:
            best_stats[key] = round(value[best_val_loss_index], 4)
    return best_stats

In [13]:
# Obtain best stats
def get_best_stats(all_folds_best_stats, folds):
    best_stats = {}
    for fold_key, fold_value in all_folds_best_stats.items():
        for key, value in fold_value.items():
            if key not in best_stats.keys():
                best_stats[key] = value
            else:
                best_stats[key] += value
    best_stats = {key: value/folds for key, value in best_stats.items()}
    return best_stats

In [14]:
def train_model(baseline_model, max_lr, epochs, folds, chkp_path, dropout_rate = 0.5, label_smoothing = 0, data_aug = False, img_size= 320, channels = 1):
    all_folds_best_stats = {}
    for fold in range(folds):
        print(f'\n********Training the model with validation fold {fold}********\n')
        # Train and validation file names for each fold
        train_image_ids = list(df[df['fold'] != fold]['StudyInstanceUID'])
        val_image_ids = list(df[df['fold'] == fold]['StudyInstanceUID'])
        train_paths = get_paths(train_image_ids, train_image_paths)
        val_paths = get_paths(val_image_ids, train_image_paths)
        train, valid = input_pipeline(train_paths, val_paths)
        # Define Model
        model = tf.keras.models.Sequential([])
        model.add(tf.keras.layers.InputLayer((img_size, img_size, channels)))
        if data_aug:
            model.add(tf.keras.layers.experimental.preprocessing.RandomFlip(mode = 'horizontal'))
        model.add(baseline_model)
        model.add(tf.keras.layers.Dropout(dropout_rate))
        model.add(tf.keras.layers.Dense(4, activation = 'sigmoid', name = 'sigmoidLayer'))

        #Learning Rate scheduler
#         learning_rate_scheduler = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=max_lr, 
#                                                       decay_steps=150,
#                                                       decay_rate = 0.95,
#                                                      )

        # CosineDecayRestarts
        learning_rate_scheduler = tf.keras.optimizers.schedules.CosineDecayRestarts(initial_learning_rate=max_lr, 
                                                      first_decay_steps=100,
                                                      alpha = 0.05,
                                                      m_mul=0.9, 
                                                      t_mul = 2  
                                                     )
        # Optimizer
        optimizer=tf.keras.optimizers.Adam(learning_rate_scheduler)

        # Compile the model
        model.compile(optimizer=optimizer, loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.0), metrics = [tf.keras.metrics.AUC()])
        
        if fold == 0:
            print(model.summary())
        
        # Callbacks
        checkpoint = tf.keras.callbacks.ModelCheckpoint(
        f'checkpoints/siim/train/{chkp_path}/model_{fold}', save_best_only=True, monitor = "val_loss")
        checkpoint._supports_tf_logs = False
        early_stopping = tf.keras.callbacks.EarlyStopping(min_delta = 0.0001, patience = 2)
        
        # Fitting the model
        history = model.fit(train,validation_data=valid, epochs = epochs,  workers = -1,callbacks= [ early_stopping, checkpoint],verbose = 1)
        
        # Getting the best models results
        fold_best_stats = get_stats(history)
        
        # Storing in a global dictionary to get aggregate results
        all_folds_best_stats[f'Fold_{fold}'] = fold_best_stats
    
    # Get the aggregate auc and loss
    best_stats = get_best_stats(all_folds_best_stats, folds)
    
    return best_stats


In [17]:
def change_model_input_size(pretrained_model, input_height, input_width):
    config = pretrained_model.get_config()
    new_shape = (None, input_height, input_width, 1)
    
    config['layers'][0]['config']['batch_input_shape'] = new_shape
    config['layers'][1]['config']['layers'][0]['config']['batch_input_shape'] = new_shape

    model = tf.keras.models.Sequential()

    model = model.from_config(config)

    for layer in model.layers[0].layers:
            try:
                layer.set_weights(pretrained_model.layers[0].get_layer(name=layer.name).get_weights())
                print("Loaded layer {}".format(layer.name))
            except:
                print("Could not transfer weights for layer {}".format(layer.name))
    return model.layers[0]

### EfficientNet-B5 320px pretrained on 320px chexpert dataset

In [18]:
# Storing all the file names
    
batchSize = 4
input_height, input_width = (320,320)
train_dir = pathlib.Path(os.path.join(data_dir, f'{input_height}px','train','study'))
train_image_paths = list(train_dir.glob('*.png'))
train_image_paths = [path for path in train_image_paths if str(path).split(os.sep)[-1].split('.')[0] in list(df.StudyInstanceUID)]

In [14]:
pretrained_model = tf.keras.models.load_model(r'.\checkpoints\train\efficientNetB5\320px')













In [19]:
new_model = tf.keras.models.Sequential([
        pretrained_model.layers[0],
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Flatten()
    ])

print('****Model loaded successfully****\n')

models_history = train_model(new_model, 0.0001, 15, folds = 5, chkp_path = 'EfficientNet-B5/chexpert/pretrain_320px/320px', img_size = 320, channels = 1)    
print(f'Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]} \
\n Train_loss: {models_history["loss"]} \n valid_loss: {models_history["val_loss"]}')

****Model loaded successfully****


********Training the model with validation fold 0********

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_4 (Sequential)    (None, 51200)             28512659  
_________________________________________________________________
dropout_2 (Dropout)          (None, 51200)             0         
_________________________________________________________________
sigmoidLayer (Dense)         (None, 4)                 204804    
Total params: 28,717,463
Trainable params: 28,544,724
Non-trainable params: 172,739
_________________________________________________________________
None
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15

********Training the model with validation fold 1********

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15

********Training the model with validation fold 2**

### EfficientNetV2-S 600px pretrained on 600px chexpert dataset

In [11]:
# Storing all the file names
    
batchSize = 8
input_height, input_width = (600,600)
train_dir = pathlib.Path(os.path.join(data_dir, f'{input_height}px','train','study'))
train_image_paths = list(train_dir.glob('*.png'))
train_image_paths = [path for path in train_image_paths if str(path).split(os.sep)[-1].split('.')[0] in list(df.StudyInstanceUID)]

In [12]:
len(train_image_paths)

5999

In [31]:
#pretrained_model = tf.keras.models.load_model('.\saved_models\efficientNetV2_s_600px.h5')

In [32]:
# tf.keras.backend.clear_session()
# new_model = effnetv2_model.get_model('efficientnetv2-s', include_top = False, pretrained = False)
# new_model.set_weights(pretrained_model.layers[0].get_weights())

# print('****Model loaded successfully****\n')

# models_history = train_model(new_model, 0.0001, 20, folds = 5, chkp_path = 'EfficientNetV2-S/chexpert/320px')    
# print(f'Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]} \
# \n Train_loss: {models_history["loss"]} \n valid_loss: {models_history["val_loss"]}')

### EfficientNetV2-S 512px pretrained on 512px chexpert dataset

In [19]:
# Storing all the file names
    
batchSize = 4
input_height, input_width = (512,512)
train_dir = pathlib.Path(os.path.join(data_dir, f'{input_height}px','train','study'))
train_image_paths = list(train_dir.glob('*.png'))
train_image_paths = [path for path in train_image_paths if str(path).split(os.sep)[-1].split('.')[0] in list(df.StudyInstanceUID)]

In [20]:
pretrained_model = tf.keras.models.load_model(r'.\checkpoints\train\efficientNetV2-s\512px')









In [21]:
tf.keras.backend.clear_session()
new_model = effnetv2_model.get_model('efficientnetv2-s', include_top = False, pretrained = False)
new_model.set_weights(pretrained_model.layers[0].get_weights())

print('****Model loaded successfully****\n')

models_history = train_model(new_model, 0.0001, 15, folds = 5, chkp_path = 'EfficientNetV2-S/chexpert/pretrain_512px/512px', img_size = 512, channels = 3)    
print(f'Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]} \
\n Train_loss: {models_history["loss"]} \n valid_loss: {models_history["val_loss"]}')

****Model loaded successfully****


********Training the model with validation fold 0********

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetv2-s (EffNetV2Mo (None, 1280)              20331360  
_________________________________________________________________
dropout_1 (Dropout)          (None, 1280)              0         
_________________________________________________________________
sigmoidLayer (Dense)         (None, 4)                 5124      
Total params: 20,336,484
Trainable params: 20,182,612
Non-trainable params: 153,872
_________________________________________________________________
None
Epoch 1/15




Epoch 2/15
Epoch 3/15

********Training the model with validation fold 1********

Epoch 1/15




Epoch 2/15
Epoch 3/15

********Training the model with validation fold 2********

Epoch 1/15




Epoch 2/15




Epoch 3/15
Epoch 4/15

********Training the model with validation fold 3********

Epoch 1/15




Epoch 2/15
Epoch 3/15

********Training the model with validation fold 4********

Epoch 1/15




Epoch 2/15




Epoch 3/15
Epoch 4/15
Model trained successfully with mean AUC score of 
 Train_AUC: 0.8817
 Valid_AUC: 0.9267000000000001 
 Train_loss: 0.76902 
 valid_loss: 0.58456


Model trained successfully with mean AUC score of <br>
 Train_AUC: 0.87748<br>
 Valid_AUC: 0.9214 <br>
 Train_loss: 0.7798 <br>
 valid_loss: 0.61614

### EfficientNetV2-S 320px pretrained on 512px chexpert dataset

In [18]:
# Storing all the file names
    
batchSize = 4
input_height, input_width = (320,320)
train_dir = pathlib.Path(os.path.join(data_dir, f'{input_height}px','train','study'))
train_image_paths = list(train_dir.glob('*.png'))
train_image_paths = [path for path in train_image_paths if str(path).split(os.sep)[-1].split('.')[0] in list(df.StudyInstanceUID)]

In [14]:
pretrained_model = tf.keras.models.load_model(r'.\checkpoints\train\efficientNetV2-s\512px')









In [19]:
tf.keras.backend.clear_session()
new_model = effnetv2_model.get_model('efficientnetv2-s', include_top = False, pretrained = False)
new_model.set_weights(pretrained_model.layers[0].get_weights())

print('****Model loaded successfully****\n')

models_history = train_model(new_model, 0.0001, 15, folds = 5, chkp_path = 'EfficientNetV2-S/chexpert/pretrain_512px/320px')    
print(f'Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]} \
\n Train_loss: {models_history["loss"]} \n valid_loss: {models_history["val_loss"]}')

****Model loaded successfully****


********Training the model with validation fold 0********

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetv2-s (EffNetV2Mo (None, 1280)              20331360  
_________________________________________________________________
dropout_1 (Dropout)          (None, 1280)              0         
_________________________________________________________________
sigmoidLayer (Dense)         (None, 4)                 5124      
Total params: 20,336,484
Trainable params: 20,182,612
Non-trainable params: 153,872
_________________________________________________________________
None
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15

********Training the model with validation fold 1********

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15

********Training the model with validation fold 2********

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch

### EfficientNetV2-S 320px pretrained on 320px chexpert dataset

In [15]:
# Storing all the file names
    
batchSize = 8
input_height, input_width = (320,320)
train_dir = pathlib.Path(os.path.join(data_dir, f'{input_height}px','train','study'))
train_image_paths = list(train_dir.glob('*.png'))
train_image_paths = [path for path in train_image_paths if str(path).split(os.sep)[-1].split('.')[0] in list(df.StudyInstanceUID)]

In [16]:
len(train_image_paths)

5999

In [17]:
pretrained_model = tf.keras.models.load_model(r'.\checkpoints\train\efficientNetV2-s')









In [None]:
tf.keras.backend.clear_session()
new_model = effnetv2_model.get_model('efficientnetv2-s', include_top = False, pretrained = False)
new_model.set_weights(pretrained_model.layers[0].get_weights())

print('****Model loaded successfully****\n')

models_history = train_model(new_model, 0.0001, 15, folds = 5, chkp_path = 'EfficientNetV2-S/chexpert/320px')    
print(f'Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]} \
\n Train_loss: {models_history["loss"]} \n valid_loss: {models_history["val_loss"]}')

****Model loaded successfully****


********Training the model with validation fold 0********

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetv2-s (EffNetV2Mo (None, 1280)              20331360  
_________________________________________________________________
dropout_1 (Dropout)          (None, 1280)              0         
_________________________________________________________________
sigmoidLayer (Dense)         (None, 4)                 5124      
Total params: 20,336,484
Trainable params: 20,182,612
Non-trainable params: 153,872
_________________________________________________________________
None
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15

********Training the model with validation fold 1********

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15

********Training the model with validation fold 2********

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15

********Trainin

Model trained successfully with mean AUC score of 
 Train_AUC: 0.9133000000000001
 Valid_AUC: 0.94634 
 Train_loss: 0.5715 
 valid_loss: 0.42942
 
 Note: The model needs to be trained again as it couldn't be saved this time.

### EfficientNetV2-S 320px pretrained on 320px chexpert dataset after data augmentation|

In [30]:
tf.keras.backend.clear_session()
new_model = effnetv2_model.get_model('efficientnetv2-s', include_top = False, pretrained = False)
new_model.set_weights(pretrained_model.layers[0].get_weights())

print('****Model loaded successfully****\n')

models_history = train_model(new_model, 0.0001, 15, folds = 5, data_aug = True, chkp_path = 'EfficientNetV2-S/chexpert/320px_with_aug')    
print(f'Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]} \
\n Train_loss: {models_history["loss"]} \n valid_loss: {models_history["val_loss"]}')

****Model loaded successfully****


********Training the model with validation fold 0********

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
random_flip (RandomFlip)     (None, 320, 320, 3)       0         
_________________________________________________________________
efficientnetv2-s (EffNetV2Mo (None, 1280)              20331360  
_________________________________________________________________
dropout_1 (Dropout)          (None, 1280)              0         
_________________________________________________________________
sigmoidLayer (Dense)         (None, 4)                 5124      
Total params: 20,336,484
Trainable params: 20,182,612
Non-trainable params: 153,872
_________________________________________________________________
None
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15

********Training the model with validation fold 1********

Epoch 1/15
Epoch 2/15

Model trained successfully with mean AUC score of <br>
 Train_AUC: 0.8951<br>
 Valid_AUC: 0.9248 <br>
 Train_loss: 0.7154<br> 
 valid_loss: 0.5844<br>

In [36]:
device = cuda.get_current_device()
device.reset()

### EfficientNetV2-S 320px pretrained on 320px chexpert dataset after applying gamma correction

In [16]:
# Storing all the file names
    
batchSize = 8
input_height, input_width = (320,320)
train_dir = pathlib.Path(os.path.join(data_dir, f'{input_height}px','train','study'))
train_image_paths = list(train_dir.glob('*.png'))
train_image_paths = [path for path in train_image_paths if str(path).split(os.sep)[-1].split('.')[0] in list(df.StudyInstanceUID)]

In [17]:
pretrained_model = tf.keras.models.load_model(r'.\checkpoints\train\efficientNetV2-s')









In [19]:
tf.keras.backend.clear_session()
new_model = effnetv2_model.get_model('efficientnetv2-s', include_top = False, pretrained = False)
new_model.set_weights(pretrained_model.layers[0].get_weights())

print('****Model loaded successfully****\n')

models_history = train_model(new_model, 0.0001, 15, folds = 5, chkp_path = 'EfficientNetV2-S/chexpert/320px_gamma')    
print(f'Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]} \
\n Train_loss: {models_history["loss"]} \n valid_loss: {models_history["val_loss"]}')

****Model loaded successfully****


********Training the model with validation fold 0********

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetv2-s (EffNetV2Mo (None, 1280)              20331360  
_________________________________________________________________
dropout_1 (Dropout)          (None, 1280)              0         
_________________________________________________________________
sigmoidLayer (Dense)         (None, 4)                 5124      
Total params: 20,336,484
Trainable params: 20,182,612
Non-trainable params: 153,872
_________________________________________________________________
None
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15

********Training the model with validation fold 1********

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15

********Training the model with validation fold 2********

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15

********Trainin

### Efficient Net B0

#### Pretrained on Imagenet

In [104]:
# Main
if __name__ == '__main__':
    
    batchSize = 16
    input_height, input_width = (320, 320)
    baseline_model = tf.keras.applications.EfficientNetB0(include_top = False, weights = 'imagenet', input_shape = (input_height, input_width, 3))
    model = tf.keras.models.Sequential([
     baseline_model,
     tf.keras.layers.MaxPool2D(),
     tf.keras.layers.Flatten(),
    ])
    print('****Model loaded successfully****\n')

    models_history = train_model(model, max_lr = 0.0001, epochs = 20, folds = 5, dropout_rate= 0.5, chkp_path = 'EfficientNetB0/imagenet')    
    print(f'****Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]}****')

****Model loaded successfully****

********Training the model with validation fold 0********

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb0 (Functional)  (None, 10, 10, 1280)      4049571   
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 5, 5, 1280)        0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 32000)             0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 32000)             0         
_________________________________________________________________
sigmoidLayer (Dense)         (None, 4)                 128004    
Total params: 4,177,575
Trainable params: 4,135,552
Non-trainable params: 42,023
_________________________________________________________________
None
*****T

Epoch 2/20
Epoch 3/20
Epoch 4/20


AttributeError: 'dict' object has no attribute 'item'

#### Pretrained on Chexpert

In [121]:
# Main
if __name__ == '__main__':
    
    batchSize = 16
    input_height, input_width = (320, 320)
    pretrained_model = tf.keras.models.load_model('./saved_models/efficientNetB0/auc_0.7520/')
    model = tf.keras.models.Sequential([
        pretrained_model.layers[0],
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Flatten()
    ])
        
    
    print('****Model loaded successfully****\n')

    models_history = train_model(model, 0.0001, 20, folds = 5, chkp_path = 'EfficientNetB0/chexpert')    
    print(f'****Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]}****')







****Model loaded successfully****


********Training the model with validation fold 0********

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb0 (Functional)  (None, 10, 10, 1280)      4048991   
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 5, 5, 1280)        0         
_________________________________________________________________
flatten_8 (Flatten)          (None, 32000)             0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 32000)             0         
_________________________________________________________________
sigmoidLayer (Dense)         (None, 4)                 128004    
Total params: 4,176,995
Trainable params: 4,134,976
Non-trainable params: 42,019
_________________________________________________________________
None
****

KeyError: 'auc'

In [114]:
print(f'****Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]}****')

{'loss': 0.8255800000000001,
 'auc_': 0.84778,
 'val_loss': 0.81768,
 'val_auc_': 0.8471200000000001}

#### Training from sratch

In [130]:
# Main
if __name__ == '__main__':
    
    batchSize = 16
    input_height, input_width = (320, 320)
    baseline_model = tf.keras.applications.EfficientNetB0(include_top = False, weights = None, input_shape = (input_height, input_width, 1))
    model = tf.keras.models.Sequential([
     baseline_model,
     tf.keras.layers.MaxPool2D(),
     tf.keras.layers.Flatten(),
    ])
        
    
    print('****Model loaded successfully****\n')

    models_history = train_model(model, 0.0001, 20, folds = 5, chkp_path = 'EfficientNetB0/no_weights')    
    print(f'****Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]}****')

****Model loaded successfully****


********Training the model with validation fold 0********

Model: "sequential_27"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb0 (Functional)  (None, 10, 10, 1280)      4048991   
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 5, 5, 1280)        0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 32000)             0         
_________________________________________________________________
dropout_18 (Dropout)         (None, 32000)             0         
_________________________________________________________________
sigmoidLayer (Dense)         (None, 4)                 128004    
Total params: 4,176,995
Trainable params: 4,134,976
Non-trainable params: 42,019
_________________________________________________________________
None
Epoc

0.77754

##### Observations
- EfficientNetB0 provided best results when pretrained on the chexpert dataset and worst when pretrained imagenet weights were used.
- We got a mean validation accuray of 0.84 (approx.) with the chexpert dataset pretrained network and 0.77 when trained from scratch using only the competition data.
- The results did not vary much as the training for the best model was very efficient and stable. Overfitting was very minimal. On the other hand when finetuned using imagenet weights overfitting was a major issue and even with lower learning rate the losses varied alot from epoch to epoch. 
- 

### Studying impact of image scales

- Any resolution that is able to result in a validation AUC greater than 0.84 will be preferred.

### EfficientNetB0 using 512px sized images

In [16]:
# Storing all the file names
train_dir = pathlib.Path(os.path.join(data_dir, '512px','train','study'))
train_image_paths = list(train_dir.glob('*.png'))

len(train_image_paths)

6054

In [17]:
train_image_paths = [path for path in train_image_paths if str(path).split(os.sep)[-1].split('.')[0] in list(df.StudyInstanceUID)]

In [18]:
len(train_image_paths)

5999

In [152]:

    
batchSize = 8
input_height, input_width = (512,512)
pretrained_model = tf.keras.models.load_model('./saved_models/efficientNetB0/auc_0.7520/')

baseline_model = change_model_input_size(pretrained_model, input_height, input_width) 
model = tf.keras.models.Sequential([
    baseline_model,
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten()
])

print('****Model loaded successfully****\n')

models_history = train_model(model, 0.0001, 20, folds = 5, chkp_path = 'EfficientNetB0/chexpert/512px')    
print('*'*20)
print(f'Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]}')

****Model loaded successfully****


********Training the model with validation fold 0********

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb0 (Functional)  (None, 16, 16, 1280)      4048991   
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 8, 8, 1280)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 81920)             0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 81920)             0         
_________________________________________________________________
sigmoidLayer (Dense)         (None, 4)                 327684    
Total params: 4,376,675
Trainable params: 4,334,656
Non-trainable params: 42,019
_________________________________________________________________
None
Epoc

EfficientNetB0 with resolution 512x512

- Train_AUC: 0.7697
- Valid_AUC: 0.7913

### DenseNet121

In [18]:
# Main
if __name__ == '__main__':
    
    batchSize = 16
    input_height, input_width = (320, 320)
    pretrained_model = tf.keras.models.load_model('./saved_models/denseNet121/auc_0.7912/')
    model = tf.keras.models.Sequential([
        pretrained_model.layers[0],
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Flatten()
    ])
        
    
    print('****Model loaded successfully****\n')

    models_history = train_model(model, 0.0001, 20, folds = 5, chkp_path = 'DenseNet121/chexpert/320px')    
    print(f'****Model trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]}****')

****Model loaded successfully****


********Training the model with validation fold 0********

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet121 (Functional)     (None, 10, 10, 1024)      7031232   
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 1024)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 25600)             0         
_________________________________________________________________
dropout (Dropout)            (None, 25600)             0         
_________________________________________________________________
sigmoidLayer (Dense)         (None, 4)                 102404    
Total params: 7,133,636
Trainable params: 7,049,988
Non-trainable params: 83,648
_________________________________________________________________
None
Epoch

Train_AUC: 0.746

Valid_AUC: 0.7738000000000002

### EfficientNetB0 on 1080px images

In [16]:
# Storing all the file names
    
batchSize = 2
input_height, input_width = (1080,1080)
train_dir = pathlib.Path(os.path.join(data_dir, f'{input_height}px','train','study'))
train_image_paths = list(train_dir.glob('*.png'))

len(train_image_paths)

6054

In [17]:
train_image_paths = [path for path in train_image_paths if str(path).split(os.sep)[-1].split('.')[0] in list(df.StudyInstanceUID)]

In [20]:

pretrained_model = tf.keras.models.load_model('./saved_models/efficientNetB0/auc_0.7520/')







In [27]:



baseline_model = change_model_input_size(pretrained_model, input_height, input_width) 
model = tf.keras.models.Sequential([
    baseline_model,
    tf.keras.layers.MaxPool2D(strides = (2,2)),
    tf.keras.layers.DepthwiseConv2D(3, 2, activation = None),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(strides = (2,2)),
    tf.keras.layers.Flatten()
])

print('****Model loaded successfully****\n')

models_history = train_model(model, 0.0001, 20, folds = 5, chkp_path = f'EfficientNetB0/chexpert/{input_height}px')    
print('*'*20)
print(f'\nModel trained successfully with mean AUC score of \n Train_AUC: {models_history["auc"]}\n Valid_AUC: {models_history["val_auc"]}')

Loaded layer input_1
Loaded layer rescaling
Loaded layer normalization
Loaded layer stem_conv_pad
Loaded layer stem_conv
Loaded layer stem_bn
Loaded layer stem_activation
Loaded layer block1a_dwconv
Loaded layer block1a_bn
Loaded layer block1a_activation
Loaded layer block1a_se_squeeze
Loaded layer block1a_se_reshape
Loaded layer block1a_se_reduce
Loaded layer block1a_se_expand
Loaded layer block1a_se_excite
Loaded layer block1a_project_conv
Loaded layer block1a_project_bn
Loaded layer block2a_expand_conv
Loaded layer block2a_expand_bn
Loaded layer block2a_expand_activation
Loaded layer block2a_dwconv_pad
Loaded layer block2a_dwconv
Loaded layer block2a_bn
Loaded layer block2a_activation
Loaded layer block2a_se_squeeze
Loaded layer block2a_se_reshape
Loaded layer block2a_se_reduce
Loaded layer block2a_se_expand
Loaded layer block2a_se_excite
Loaded layer block2a_project_conv
Loaded layer block2a_project_bn
Loaded layer block2b_expand_conv
Loaded layer block2b_expand_bn
Loaded layer blo

Epoch 2/20
Epoch 3/20
Epoch 4/20

********Training the model with validation fold 1********

Epoch 1/20
Epoch 2/20
Epoch 3/20
 129/2400 [>.............................] - ETA: 14:37 - loss: 1.2111 - auc_6: 0.7327

KeyboardInterrupt: 

Model accuracies did not improve for high resolution images. Tests will be conducted one more time for model pretrained on chexpert dataset using  high resolution images.