In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPool2D, Normalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import callbacks
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import os
import random
import itertools

In [None]:
train_path = 'Training'
valid_path = 'Validation'
test_path = 'Test'

In [None]:
# preprocessing training and validation images through Keras
train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
    .flow_from_directory(directory=train_path, target_size=(224,224), classes = ["Black", "East Asian", "Indian", "Latino", "Middle Eastern", "Southeast Asian", "White"], batch_size=280)
valid_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
    .flow_from_directory(directory=valid_path, target_size=(224,224), classes = ["Black", "East Asian", "Indian", "Latino", "Middle Eastern", "Southeast Asian", "White"], batch_size=200)
test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
    .flow_from_directory(directory=test_path, target_size=(224,224), classes = ["Black", "East Asian", "Indian", "Latino", "Middle Eastern", "Southeast Asian", "White"], batch_size=140, shuffle=False)
# training batch contains 7000 images belogning to 7 classes
# validation contains 1400 images belonging to 7 classes
# test contains 140 images belogning to 7 classes

In [None]:
imgs, labels = next(train_batches)

In [None]:
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 10, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout
    plt.show

In [None]:
plotImages(imgs)
print(labels)

In [None]:
# Creating convolutional neural network
# 3 x 3 
model = Sequential([
    Conv2D(filters=32, kernel_size=(3,3), activation="relu", padding="same", input_shape=(224,224,3)),
    MaxPool2D(pool_size=(2,2), strides = 2), # cuts image dimensions in half
    Conv2D(filters=64, kernel_size=(3,3), activation="relu", padding="same"),
    MaxPool2D(pool_size= (2,2), strides = 2),
    Flatten(),
    Dense(units=7, activation="softmax"), #softmax gives us probabilities for each output
])

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
earlystopping = callbacks.EarlyStopping(monitor ="val_loss", 
                                        mode ="min", patience = 5, 
                                        restore_best_weights = True)

In [None]:
# add callbacks=[earlystopping] here for defining epoch overfitting point
model.fit(x=train_batches, validation_data=valid_batches, epochs = 25, verbose=2, callbacks=[earlystopping])

Prediction on test set

In [None]:
test_imgs, test_labels = next(test_batches)

In [None]:
test_batches.classes

In [None]:
predictions = model.predict(x=test_batches, verbose=0)

In [None]:
cm = confusion_matrix(y_true=test_batches.classes, y_pred =np.argmax(predictions, axis=-1))

In [None]:
# visualize where the model is making the most mistakes
def plot_confusion_matrix(cm, classes, normalize=True, title='Confusion Matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
test_batches.class_indices

In [None]:
cm_plot_labels=["Black", "East Asian", "Indian", "Latino", "Middle Eastern", "Southeast Asian", "White"]
plot_confusion_matrix(cm, classes=cm_plot_labels, title='Confusion Matrix')