# Package installation

In [2]:
!pip install pandas
!pip install scikit-learn
!pip install --upgrade tensorflow keras numpy pandas sklearn pillow

Collecting pandas
[?25l  Downloading https://files.pythonhosted.org/packages/4d/51/bafcff417cd857bc6684336320863b5e5af280530213ef8f534b6042cfe6/pandas-1.1.4-cp36-cp36m-manylinux1_x86_64.whl (9.5MB)
[K     |████████████████████████████████| 9.5MB 16.5MB/s eta 0:00:01
Collecting pytz>=2017.2 (from pandas)
[?25l  Downloading https://files.pythonhosted.org/packages/12/f8/ff09af6ff61a3efaad5f61ba5facdf17e7722c4393f7d8a66674d2dbd29f/pytz-2020.4-py2.py3-none-any.whl (509kB)
[K     |████████████████████████████████| 512kB 54.1MB/s eta 0:00:01
Installing collected packages: pytz, pandas
Successfully installed pandas-1.1.4 pytz-2020.4
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Collecting scikit-learn
[?25l  Downloading https://files.pythonhosted.org/packages/5c/a1/273def87037a7fb010512bbc5901c31cfddfca8080bc63b42b26e3cc55b3/scikit_learn-0.23.2-cp36-cp36m-manylinux1_x86_64.whl (6.8MB)
[K     |████████████████████████████████| 6.8MB 29.7MB/s eta 0:00:01
[

In [12]:
!pip install matplotlib

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


### import packages

In [3]:
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import AveragePooling2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.convolutional import ZeroPadding2D
from keras.layers.core import Activation
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
from keras.models import Model
from keras.layers import add
from keras.regularizers import l2
from keras import backend as K
from keras.applications.resnet_v2 import ResNet50V2

from datetime import datetime
import time
import os
import numpy as np
import tensorflow as tf
import re
import pandas as pd

from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers

from tensorflow.keras.optimizers import RMSprop

from sklearn.utils import shuffle
import matplotlib.pyplot as plt

# 1st round of training: Freeze all layers

## Setting up model

In [5]:
learning_rate = 1e-4
img_height, img_width = 224, 224
filename = "resnet_freeze_all"

In [6]:
pre_trained_model = ResNet50V2( input_shape=(img_height,img_width,3),
                                include_top = False,
                                weights = 'imagenet' 
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [7]:
#Make all layers non-trainable
for layer in pre_trained_model.layers:
    layer.trainable= False

In [8]:
# Flatten the output layer to 1-dimension
x = layers.Flatten()(pre_trained_model.output)
# Add fully connected layer, with relu activation
x = layers.Dense(1024, activation='relu')(x)
# Add dropout
x = layers.Dropout(0.5)(x)
# Add sigmoid layer for classification. Sigmoid is used instead of softmax, because this is a binary classifier
x = layers.Dense(1, activation='sigmoid')(x)

model = Model(pre_trained_model.input, x)

# binary crossentropy used for binary classification
model.compile(optimizer=RMSprop(learning_rate),
             loss = 'binary_crossentropy',
             metrics=['accuracy'])

## Train generator

In [10]:
batch_size = 256
train_directory = "storage/train"
validation_directory = "storage/validate"
seed = 10
no_epochs = 40

# Data augmentation
train_datagen =ImageDataGenerator(rescale=1./255,
                                 width_shift_range = 0.1,
                                 height_shift_range = 0.1)
train_generator = train_datagen.flow_from_directory(
    directory=train_directory, 
    batch_size = batch_size,
    class_mode ='binary',
    target_size = (img_height,img_width),
shuffle = True)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = train_datagen.flow_from_directory(
    directory=validation_directory, 
    class_mode ='binary',
    target_size = (img_height,img_width))

Found 8500 images belonging to 2 classes.
Found 1061 images belonging to 2 classes.


In [13]:
checkpoint_filepath = f"storage/checkpoint/{filename}.hdf5"
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

history = model.fit_generator(generator=train_generator,
                    validation_data=validation_generator, 
                    epochs=no_epochs, 
                    verbose=1,
                   callbacks=[model_checkpoint_callback]
                   )

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [15]:
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
loss= history.history['loss']
val_loss = history.history['val_loss']

plt.plot(accuracy)
plt.plot(val_accuracy)
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig(f'storage/results/{filename}_accuracy.png')
plt.close()

plt.plot(loss)
plt.plot(val_loss)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig(f'storage/results/{filename}_loss.png')
plt.close()

## Get confusion matrix

In [31]:
#load best model
model.load_weights(f"storage/checkpoint/{filename}.hdf5")

In [32]:
from sklearn.metrics import confusion_matrix

test_directory = "storage/test/"

test_datagen = ImageDataGenerator(rescale=1./255)

generator = test_datagen.flow_from_directory(
        test_directory,
        target_size=(224, 224),
        batch_size=16,
        class_mode=None,  # only data, no labels
        shuffle=False)  # keep data in same order as labels

probabilities = model.predict_generator(generator, verbose =1)

Found 1635 images belonging to 2 classes.


In [33]:
import os

num_female = sum(len(files) for _, _, files in os.walk('storage/test/female'))
num_male = sum(len(files) for _, _, files in os.walk('storage/test/male'))
print(num_female + num_male)

1635


In [34]:
y_true = np.array([0] * num_female + [1] * num_male)
y_pred = probabilities > 0.5

res = confusion_matrix(y_true, y_pred)

fig, ax = plt.subplots()
im = ax.imshow(res, cmap='Blues')
for i in range(2):
    for j in range(2):
        text = ax.text(j, i, res[i, j],
                       ha="center", va="center", color="y")
plt.title('Confusion matrix from model prediction')
plt.savefig(f'storage/results/{filename}_confusion_matrix.png')
plt.close()

print(res)

[[785 101]
 [105 644]]


# 2nd round of training: Unfreeze last block

## Setting up model

In [None]:
learning_rate = 1e-4
img_height, img_width = 224, 224
filename = "resnet_fine_tune"

In [None]:
pre_trained_model = ResNet50V2( input_shape=(img_height,img_width,3),
                                include_top = False,
                                weights = 'imagenet'        
)

In [None]:
#Unfreeze last 38 layers
for layer in pre_trained_model.layers[:-38]:
    layer.trainable= False
for layer in pre_trained_model.layers[-38:]:
    layer.trainable= True

In [None]:
# Flatten the output layer to 1-dimension
x = layers.Flatten()(pre_trained_model.output)
# Add fully connected layer, with relu activation
x = layers.Dense(1024, activation='relu')(x)
# Add dropout
x = layers.Dropout(0.5)(x)
# Add sigmoid layer for classification. Sigmoid is used instead of softmax, because this is a binary classifier
x = layers.Dense(1, activation='sigmoid')(x)

model = Model(pre_trained_model.input, x)

# binary crossentropy used for binary classification
model.compile(optimizer=RMSprop(learning_rate),
             loss = 'binary_crossentropy',
             metrics=['accuracy'])

## Setting up train and validation generators

In [10]:
batch_size = 256
train_directory = "storage/train"
validation_directory = "storage/validate"
seed = 10
no_epochs = 40

# Data augmentation
train_datagen =ImageDataGenerator(rescale=1./255,
                                 width_shift_range = 0.1,
                                 height_shift_range = 0.1)
train_generator = train_datagen.flow_from_directory(
    directory=train_directory, 
    batch_size = batch_size,
    class_mode ='binary',
    target_size = (img_height,img_width),
shuffle = True)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = train_datagen.flow_from_directory(
    directory=validation_directory, 
    class_mode ='binary',
    target_size = (img_height,img_width))

Found 8500 images belonging to 2 classes.
Found 1061 images belonging to 2 classes.


In [11]:
checkpoint_filepath = f"storage/checkpoint/{filename}.hdf5"
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

history = model.fit_generator(generator=train_generator,
                    validation_data=validation_generator, 
                    epochs=no_epochs, 
                    verbose=1,
                   callbacks=[model_checkpoint_callback]
                   )

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [None]:
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
loss= history.history['loss']
val_loss = history.history['val_loss']

plt.plot(accuracy)
plt.plot(val_accuracy)
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig(f'storage/results/{filename}_accuracy.png')
plt.close()

plt.plot(loss)
plt.plot(val_loss)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig(f'storage/results/{filename}_loss.png')
plt.close()

## Get confusion matrix

In [None]:
#load best model
model.load_weights(f"storage/checkpoint/{filename}.hdf5")

In [14]:
from sklearn.metrics import confusion_matrix

test_directory = "storage/test/"

test_datagen = ImageDataGenerator(rescale=1./255)

generator = test_datagen.flow_from_directory(
        test_directory,
        target_size=(224, 224),
        batch_size=16,
        class_mode=None,  # only data, no labels
        shuffle=False)  # keep data in same order as labels

probabilities = model.predict_generator(generator, verbose =1)

Found 1635 images belonging to 2 classes.
Instructions for updating:
Please use Model.predict, which supports generators.


In [15]:
import os

num_female = sum(len(files) for _, _, files in os.walk('storage/test/female'))
num_male = sum(len(files) for _, _, files in os.walk('storage/test/male'))
print(num_female + num_male)

1635


In [16]:
y_true = np.array([0] * num_female + [1] * num_male)
y_pred = probabilities > 0.5

res = confusion_matrix(y_true, y_pred)

fig, ax = plt.subplots()
im = ax.imshow(res, cmap='Blues')
for i in range(2):
    for j in range(2):
        text = ax.text(j, i, res[i, j],
                       ha="center", va="center", color="y")
plt.title('Confusion matrix from model prediction')
plt.savefig(f'storage/results/{filename}_confusion_matrix.png')
plt.close()

print(res)

[[835  51]
 [ 73 676]]
