# Training CNNs
Code copy-pasted

In [None]:
!pip install keras

In [16]:
import pandas as pd
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k 
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

In [17]:
import tensorflow as tf

assert tf.test.is_gpu_available()
assert tf.test.is_built_with_cuda()

In [18]:
labels_df = pd.read_csv('C:\\Users\\Administrator\\Downloads\\labels_cleaned.csv')
labels_df = labels_df.astype(str) # required for flow_from_dataframe

In [19]:
labels_df.head(3)

Unnamed: 0.1,Unnamed: 0,ID,any
0,0,ID_000039fa0.png,0
1,1,ID_00005679d.png,0
2,2,ID_00008ce3c.png,0


In [20]:
img_width, img_height = 128, 128
train_data_dir = "C:\\Users\\Administrator\\Downloads\\Windowed-PNGs-FINAL-train"
validation_data_dir = 'C:\\Users\\Administrator\\Downloads\\Windowed-PNGs-FINAL-test'
#nb_train_samples = 4125
#nb_validation_samples = 466 
batch_size = 32
epochs = 1 # 50

model = applications.VGG19(weights = "imagenet", include_top=False, input_shape = (img_width, img_height, 3))

In [21]:
# freeze all layers up to pool3_pool (freezes first three conv_blocks, thaws last two)
layers = model.layers
print(len(layers))
model.summary()

index = 0
for layer in layers:
    if layer.name == 'block3_conv1':
        print('We want to freeze all layers up to (not including) layer ', index)
    else:
        index += 1

22
Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 128, 128, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 32, 32, 128)       0  

In [25]:
# Freeze the layers which you don't want to train. Here I am freezing the first 140 layers.
for layer in model.layers[0:12]:
    layer.trainable = False

#Adding custom Layers 
x = model.output
x = Flatten()(x)
x = Dense(1000, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(1000, activation="relu")(x)
predictions = Dense(1, activation="sigmoid")(x)

# creating the final model 
model_final = Model(inputs = model.input, outputs = predictions)

# compile the model; choose larger LR? initially 0.0001; del momentum (not SGD)
# added recall, commented out below; lace
model_final.compile(loss = "binary_crossentropy", optimizer = optimizers.Adam(lr=0.0001), metrics=["accuracy", tf.keras.metrics.Recall(), tf.keras.metrics.AUC(), tf.keras.metrics.Precision()])

# Initiate the train and test generators with data augmentation 
train_datagen = ImageDataGenerator(
rescale = 1./255
#horizontal_flip = True,
#fill_mode = "nearest",
#zoom_range = 0.3,
#width_shift_range = 0.3,
#height_shift_range=0.3,
#rotation_range=30
)

test_datagen = ImageDataGenerator(
rescale = 1./255,
#horizontal_flip = True,
#fill_mode = "nearest",
#zoom_range = 0.3,
#width_shift_range = 0.3,
#height_shift_range=0.3,
#rotation_range=30
)

# batch_size=32,
train_generator = train_datagen.flow_from_dataframe(
        dataframe=labels_df,
        directory=train_data_dir,
        x_col='ID',
        y_col='any',
        target_size=(img_width, img_height),
        class_mode='binary')

validation_generator = test_datagen.flow_from_dataframe(
        dataframe=labels_df,
        directory=validation_data_dir,
        x_col='ID',
        y_col='any',
        target_size=(img_width, img_height),
        class_mode='binary')

# Save the model according to the conditions  
checkpoint = ModelCheckpoint('C:\\Users\\Administrator\\Downloads\\best_cnn.h5', monitor=tf.keras.metrics.Recall(), verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor=tf.keras.metrics.Recall(), min_delta=0, patience=3, verbose=1, mode='auto')

Found 68827 validated image filenames belonging to 2 classes.
Found 16307 validated image filenames belonging to 2 classes.


  .format(n_invalid, x_col)


In [52]:
checkpoint = ModelCheckpoint('C:\\Users\\Administrator\\Downloads\\Checkpoints\\checkpoint-{epoch:02d}-{val_accuracy:.2f}.hdf5', monitor='val_acc', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)
#early = EarlyStopping(monitor='val_acc', min_delta=0, patience=2, verbose=1, mode='auto')

In [56]:
# Train the model; above works with png, seems to ignore 12k erased images
# samples_per_epoch = nb_train_samples,
#validation_data = validation_generator,
# nb_val_samples = nb_validation_samples,
#class_weight={0:0.15, 1:0.85},
model_final.fit_generator(
train_generator,
callbacks = [checkpoint],
validation_data = validation_generator,
epochs = 50) # 26+ epochs

Epoch 1/50
  96/2151 [>.............................] - ETA: 1:07:43 - loss: 0.0072 - accuracy: 0.9990 - recall_2: 0.7780 - auc_2: 0.9591 - precision: 0.8694

KeyboardInterrupt: 

In [54]:
# not necessary to execute
model_final.save('C:\\Users\\Administrator\\Downloads\\trained_cnn_4.h5')

In [41]:
###############################

In [12]:
preds = model_final.predict_generator(validation_generator)

In [13]:
predlen = len(preds)
zerocounter = 0

for pred in preds:
    if pred < 0.5:
        zerocounter += 1

print('Predicts no hemorrhage: ', zerocounter/predlen)
zerocounter
max(preds)

Predicts no hemorrhage:  0.8739804991721347


array([0.7886795], dtype=float32)

In [None]:
# train-test-split on data (can still use same dataframe? probably)
# just split the dataframe into train and test (also decrease size)

In [None]:
import glob
import numpy as np

In [None]:
filenames = glob.glob('C:\\Users\\Administrator\\Downloads\\Windowed-PNGs\\*')
nb_files = len(filenames)
filename_array = []

for i in range(nb_files):
    first = filenames[i]
    correct_row = train_df['ID'] == filenames[i] + '.png'
    second = train_df[correct_row].loc['any'] # png necessary?
    filename_array.append([first, second])