# Package installation

In [None]:
!pip install pandas

In [1]:
!pip install patool

Collecting patool
[?25l  Downloading https://files.pythonhosted.org/packages/43/94/52243ddff508780dd2d8110964320ab4851134a55ab102285b46e740f76a/patool-1.12-py2.py3-none-any.whl (77kB)
[K     |████████████████████████████████| 81kB 3.6MB/s eta 0:00:011
[?25hInstalling collected packages: patool
Successfully installed patool-1.12
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [6]:
!sudo apt-get install unrar

/bin/sh: 1: sudo: not found


In [3]:
!pip install pyunpack

Collecting pyunpack
  Downloading https://files.pythonhosted.org/packages/83/29/020436b1d8e96e5f26fa282b9c3c13a3b456a36b9ea2edc87c5fed008369/pyunpack-0.2.2-py2.py3-none-any.whl
Collecting entrypoint2 (from pyunpack)
  Downloading https://files.pythonhosted.org/packages/c2/ca/00c8767568924e5c2209da99b6abdeeed9d11cbae2a713d54d041b092a09/entrypoint2-0.2.3-py2.py3-none-any.whl
Collecting easyprocess (from pyunpack)
  Downloading https://files.pythonhosted.org/packages/48/3c/75573613641c90c6d094059ac28adb748560d99bd27ee6f80cce398f404e/EasyProcess-0.3-py2.py3-none-any.whl
Collecting argparse (from entrypoint2->pyunpack)
  Downloading https://files.pythonhosted.org/packages/f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl
Installing collected packages: argparse, entrypoint2, easyprocess, pyunpack
Successfully installed argparse-1.4.0 easyprocess-0.3 entrypoint2-0.2.3 pyunpack-0.2.2
You should consider upgrading via the 'pip install --upgra

In [None]:
!pip install scikit-learn

In [21]:
!pip install pillow

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [None]:
!pip install matplotlib

In [None]:
!pip install --upgrade tensorflow keras numpy pandas sklearn pillow

# Project set up

In [7]:
from zipfile import ZipFile
zf = ZipFile('storage/test.zip', 'r')
zf.extractall('storage')
zf.close()

In [1]:
from datetime import datetime
import time
import os
import numpy as np
import tensorflow as tf
import re
import pandas as pd

from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras import Model

from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.optimizers import RMSprop

from sklearn.utils import shuffle
import matplotlib.pyplot as plt

from PIL import Image
import PIL

## Removing unnecessary files

In [50]:
import shutil
files = folders = 0

path = "storage/train/"

for parent, dirnames, filenames in os.walk(path):
    print(dirnames)
    for dr in dirnames:
        if dr.lower().endswith('.ipynb_checkpoints'):
            print(parent)
            shutil.rmtree(dr)

['male', 'female']
[]
['.ipynb_checkpoints']
storage/small_train/female
[]


In [51]:
#check if everything is removed
for _, dirnames, filenames in os.walk(path):
  # ^ this idiom means "we won't be using this value"
    files += len(filenames)
    folders += len(dirnames)
    if dirnames:
        print(dirnames)

print("{:,} files, {:,} folders".format(files, folders))

['male', 'female']
['.ipynb_checkpoints']
8,501 files, 3 folders


# 1st round of training: Freeze all layers

## Setting up model

In [None]:
print(InceptionV3().summary())

In [2]:
pre_trained_model = InceptionV3( input_shape=(299,299,3),
                                include_top = False,
                                weights = 'imagenet'
)

#Make all layers non-trainable
for layer in pre_trained_model.layers:
    layer.trainable= False

In [4]:
learning_rate = 1e-4

# Flatten the output layer to 1-dimension
x = layers.Flatten()(pre_trained_model.output)
# Add fully connected layer, with relu activation
x = layers.Dense(1024, activation='relu')(x)
# Add droupuut
x = layers.Dropout(0.5)(x)
# Add sigmoid layer for classification. Sigmoid is used instead of softmax, because this is a binary classifier
x = layers.Dense(1, activation='sigmoid')(x)

model = Model(pre_trained_model.input, x)

# binary crossentropy used for binary classification
model.compile(optimizer=RMSprop(learning_rate),
             loss = 'binary_crossentropy',
             metrics=['accuracy'])

## Train generator

In [8]:
batch_size = 256
train_directory = "storage/train/"
validation_directory = "storage/validate/"
seed = 10
no_epochs = 40

# Data augmentation
train_datagen =ImageDataGenerator(rescale=1./255,
                                 width_shift_range = 0.1,
                                 height_shift_range = 0.1)
train_generator = train_datagen.flow_from_directory(
    directory=train_directory, 
    batch_size = batch_size,
    class_mode ='binary',
    target_size = (299,299),
shuffle = True)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = train_datagen.flow_from_directory(
    directory=validation_directory, 
    class_mode ='binary',
    target_size = (299,299))

Found 8502 images belonging to 2 classes.
Found 1634 images belonging to 2 classes.


In [9]:
checkpoint_filepath = 'storage/checkpoint/inception_freeze_all.hdf5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

history = model.fit_generator(generator=train_generator,
                    validation_data=validation_generator, 
                    epochs=no_epochs, 
                    verbose=1,
                   callbacks=[model_checkpoint_callback])

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [10]:
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
loss= history.history['loss']
val_loss = history.history['val_loss']

plt.plot(accuracy)
plt.plot(val_accuracy)
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('storage/results/inception_freeze_all_accuracy.png')
plt.close()

plt.plot(loss)
plt.plot(val_loss)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('storage/results/inception_freeze_all_loss.png')
plt.close()

## Get confusion matrix

In [6]:
#load model
weights_dir = "storage/checkpoint/best_model.hdf5"
model.load_weights(weights_dir)

In [7]:
import os

num_female = sum(len(files) for _, _, files in os.walk('storage/test/female'))
num_male = sum(len(files) for _, _, files in os.walk('storage/test/male'))
print(num_female + num_male)

1647


In [8]:
from sklearn.metrics import confusion_matrix

test_directory = "storage/test/"

test_datagen = ImageDataGenerator(rescale=1./255)

generator = test_datagen.flow_from_directory(
        test_directory,
        target_size=(299, 299),
        batch_size=16,
        class_mode=None,  # only data, no labels
        shuffle=False)  # keep data in same order as labels

probabilities = model.predict_generator(generator, verbose =1)

Found 1647 images belonging to 2 classes.
Instructions for updating:
Please use Model.predict, which supports generators.


In [9]:
y_true = np.array([0] * num_female + [1] * num_male)
y_pred = probabilities > 0.5

res = confusion_matrix(y_true, y_pred)

fig, ax = plt.subplots()
im = ax.imshow(res, cmap='Blues')
for i in range(2):
    for j in range(2):
        text = ax.text(j, i, res[i, j],
                       ha="center", va="center", color="y")
plt.title('Confusion matrix from model prediction')
plt.savefig('storage/results/inception_freeze_all_loss_confusion_matrix.png')
plt.close()

print(res)

[[781 105]
 [139 622]]


# 2nd round of training: Unfreeze last 2 inception blocks

## Setting up model

In [2]:
pre_trained_model_v2 = InceptionV3( input_shape=(299,299,3),
                                include_top = False,
                                weights = 'imagenet'
)
print(len(pre_trained_model_v2.layers))

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in pre_trained_model_v2.layers[:249]:
    layer.trainable = False
for layer in pre_trained_model_v2.layers[249:]:
    layer.trainable = True

311


In [3]:
learning_rate = 1e-4

# Flatten the output layer to 1-dimension
x = layers.Flatten()(pre_trained_model_v2.output)
# Add fully connected layer, with relu activatio
x = layers.Dense(1024, activation='relu')(x)
# Add droupuut
x = layers.Dropout(0.5)(x)
# Add sigmoid layer for classification. Sigmoid is used instead of softmax, because this is a binary classifier
x = layers.Dense(1, activation='sigmoid')(x)

model_v2 = Model(pre_trained_model_v2.input, x)

# binary crossentropy used for binary classification
model_v2.compile(optimizer=RMSprop(learning_rate),
             loss = 'binary_crossentropy',
             metrics=['accuracy'])

## Setting up train and validation generators

In [5]:
batch_size = 256
train_directory = "storage/train/"
validation_directory = "storage/validate/"
seed = 10
no_epochs = 30

# Data augmentation
train_datagen =ImageDataGenerator(rescale=1./255,
                                 width_shift_range = 0.1,
                                 height_shift_range = 0.1)
train_generator = train_datagen.flow_from_directory(
    directory=train_directory, 
    batch_size = batch_size,
    class_mode ='binary',
    target_size = (299,299),
shuffle = True)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = train_datagen.flow_from_directory(
    directory=validation_directory, 
    class_mode ='binary',
    target_size = (299,299))

Found 8502 images belonging to 2 classes.
Found 1634 images belonging to 2 classes.


In [6]:
checkpoint_filepath = 'storage/checkpoint/inception_fine_tune.hdf5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

history = model_v2.fit_generator(generator=train_generator,
                    validation_data=validation_generator, 
                    epochs=no_epochs, 
                    verbose=1,
                   callbacks=[model_checkpoint_callback])

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [7]:
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
loss= history.history['loss']
val_loss = history.history['val_loss']

plt.plot(accuracy)
plt.plot(val_accuracy)
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('storage/results/inception_fine_tune_accuracy.png')
plt.close()

plt.plot(loss)
plt.plot(val_loss)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('storage/results/inception_fine_tune_loss.png')
plt.close()

## Get confusion matrix

In [8]:
#load model
weights_dir = "storage/checkpoint/inception_fine_tune.hdf5"
model_v2.load_weights(weights_dir)

In [9]:
import os

num_female = sum(len(files) for _, _, files in os.walk('storage/test/female'))
num_male = sum(len(files) for _, _, files in os.walk('storage/test/male'))
print(num_female + num_male)

1647


In [10]:
from sklearn.metrics import confusion_matrix

test_directory = "storage/test/"

test_datagen = ImageDataGenerator(rescale=1./255)

generator = test_datagen.flow_from_directory(
        test_directory,
        target_size=(299, 299),
        batch_size=16,
        class_mode=None,  # only data, no labels
        shuffle=False)  # keep data in same order as labels

probabilities = model_v2.predict_generator(generator, verbose =1)

Found 1647 images belonging to 2 classes.
Instructions for updating:
Please use Model.predict, which supports generators.


In [11]:
y_true = np.array([0] * num_female + [1] * num_male)
y_pred = probabilities > 0.5

res = confusion_matrix(y_true, y_pred)

fig, ax = plt.subplots()
im = ax.imshow(res, cmap='Blues')
for i in range(2):
    for j in range(2):
        text = ax.text(j, i, res[i, j],
                       ha="center", va="center", color="y")
plt.title('Confusion matrix from model_v2 prediction')
plt.savefig('storage/results/inception_fine_tune.png')
plt.close()

print(res)

[[822  64]
 [ 62 699]]
