In [2]:
import os
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Run this code if in google colab
# Input archive zip file is named images.zip in folder
if not os.path.isdir('test'):
    !unzip 'images.zip'
    shutil.rmtree('train/disgust')
    shutil.rmtree('test/disgust')
!pip install tensorflow

In [4]:
import tensorflow as tf
import keras

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Rescaling, BatchNormalization
from keras.losses import CategoricalCrossentropy
from keras.utils import image_dataset_from_directory
from keras.regularizers import L2

In [5]:
args = {
    'batch_size' : 32,
    'image_size' : (64,64),
    'input_shape' : (64,64,1),
    'validation_split': 0.2,
    'epoch': 75,
    'learning_rate' : 0.001
}

In [6]:
def plot_history(history):
    
    def plot(train, val, kind):

        x = np.arange(1, np.size(train) + 1)

        plt.plot(x, train, label = "training " + kind)
        plt.plot(x, val, label = "validation " + kind)
        plt.xlabel("epoch")
        plt.ylabel(kind)
        plt.title("training and validation " + kind + " v epoch")
        plt.show()
        
    train_loss_history = history.history['loss']
    val_loss_history = history.history['val_loss']

    train_acc_history = history.history['accuracy']
    val_acc_history = history.history['val_accuracy']

    # plot
    plot(train_loss_history, val_loss_history, 'loss')
    plot(train_acc_history, val_acc_history, 'accuracy')

In [7]:
def create_cnn(args=None):

	model = Sequential()
	model.add(Rescaling(1./255))
 
	model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
	model.add(MaxPooling2D(pool_size=(2,2)))
	model.add(Dropout(0.4))
	model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
	model.add(MaxPooling2D(pool_size=(2,2)))
	model.add(Dropout(0.4))
	model.add(Conv2D(128, kernel_size=(3,3), activation='relu'))
	model.add(MaxPooling2D(pool_size=(2,2)))
	model.add(Dropout(0.4))

	model.add(Flatten())

	model.add(Dense(units = 512, activation = 'relu'))
	model.add(Dropout(0.6))
	model.add(Dense(units = 6, activation = 'softmax'))

    # Compile
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

	return model

In [8]:
def train_cnn(directory, args=None):
	# You can use args to pass parameter values to this method
    train, valid = tf.keras.utils.image_dataset_from_directory(
        directory, label_mode = 'categorical', color_mode = 'grayscale',
        image_size = args['image_size'], seed = 0, batch_size = args['batch_size'],
        validation_split = args['validation_split'], subset = 'both')
    
    model = create_cnn(args)
    history = model.fit(train, epochs = args['epoch'], 
                        validation_data = valid)
    return model, history

In [9]:
def analyze_model(test, model, history):

    print(model.summary())
    print('------------------------------------------------------------------')
    plot_history(history)
    print('------------------------------------------------------------------')
    
    test_data = tf.keras.utils.image_dataset_from_directory(
        test, label_mode = 'categorical', color_mode = 'grayscale', 
        image_size = args['image_size'], batch_size = args['batch_size'])

    labels = sorted(os.listdir('test'))
    predicted = [labels[label] for label in \
                 np.argmax(cnn_model.predict(test_data), axis = 1)]
    actual = [labels[label] for label in \
                np.argmax(np.concatenate([y for x, y in test_data], axis=0),axis = 1)]
    test_df = pd.DataFrame({'Actual Label':actual,'Predicted Label':predicted})
    test_df['Prediction Correct'] = test_df['Actual Label'] == test_df['Predicted Label']

    prediction_accuracy = test_df['Prediction Correct'].mean()
    print('Prediction Accuracy of Model: ', prediction_accuracy)

    display(test_df)
    print('------------------------------------------------------------------')

    def plot(srs, x, y, title):
        plt.bar(srs.index, srs)
        plt.xlabel(x)
        plt.ylabel(y)
        plt.title(title)
        plt.show()
    
    prediction_by_label = test_df.groupby('Predicted Label')['Prediction Correct'].mean()
    display(prediction_by_label)
    
    plot(prediction_by_label, 'Label', 'Prediction Accuracy', 
         'Prediction Accuracy by Label')
    print('------------------------------------------------------------------')
    print('------------------------------------------------------------------')

In [None]:
best_model = None
best_history = None
best_accuracy = 0
cnn_model, cnn_history = train_cnn('train', args)
cnn_accuracy = max(cnn_history.history['val_accuracy'])
print('validation accuracy:',cnn_accuracy)
analyze_model('test',cnn_model,cnn_history)
if cnn_accuracy > best_accuracy:
    best_model = cnn_model
    best_history = cnn_history
    best_accuracy = cnn_accuracy

Found 28273 files belonging to 6 classes.
Using 22619 files for training.
Using 5654 files for validation.
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75

In [None]:
analyze_model('test',best_model,best_history)