# Deep Learning Assignment - 01 , Problem - 01, Group - 029
# Vision Dataset - Animal Image Classification 

https://www.kaggle.com/rwt1998/animal-classification

https://www.kaggle.com/bygbrains/dog-cat-pandas-image-classifier


## Library Imports

In [80]:
import os
from os import listdir
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.image import imread
from datetime import datetime

import warnings
warnings.simplefilter("ignore")

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, f1_score, accuracy_score
from sklearn.model_selection import train_test_split

## Validate GPU Availability

In [81]:
# Note this code is only valid if GPU is being used for training

# Before we run the parameter tuning, we will work with little gpu memory allocation
# we will only use that much of memory of gpu as it is needed - allow the growth of gpu memory as it is needed
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)
else:
    print("No GPU Available, switching to CPU Version")

1 Physical GPUs, 1 Logical GPUs


## Load the dataset and validate the data load

In [82]:
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)

dataset = train_datagen.flow_from_directory("/home/suvo/Documents/LargeDatasets/CNNDatasets/Cats-Dogs-Pandas/animals", target_size=(32, 32), batch_size=32, class_mode='binary')

Found 3000 images belonging to 3 classes.


In [83]:
listdir("/home/suvo/Documents/LargeDatasets/CNNDatasets/Cats-Dogs-Pandas/animals")

['cats', 'dogs', 'panda']

In [84]:
folder = "/home/suvo/Documents/LargeDatasets/CNNDatasets/Cats-Dogs-Pandas/animals"
# folder1 = "/home/suvo/Documents/LargeDatasets/CNNDatasets/Cats-Dogs-Pandas/animals"

imageList=[]
classList=[]

for file1 in listdir(folder):
    file2 = folder + "/" + file1
    for file3 in listdir(file2):
        file4 = file2 + "/" + file3
        image = tf.keras.preprocessing.image.load_img(file4, target_size=(128, 128))
        image = tf.keras.preprocessing.image.img_to_array(image)
        imageList.append(image)
        classList.append(file1)

# Check whether all the images has been parsed
print(f"Length of the image list - {len(imageList)}")
print(f"Length of the class list - {len(classList)}")

Length of the image list - 3000
Length of the class list - 3000


#### Display four images from each of the classes

In [85]:
# test

In [86]:
# We will convert this list into numpy array
imageArray = np.asarray(imageList)
classArray = np.asarray(classList)

print(f"Shape of the image numpy array - {imageArray.shape}")
print(f"Shape of the class numpy array - {classArray.shape}")

Shape of the image numpy array - (3000, 128, 128, 3)
Shape of the class numpy array - (3000,)


#### Normalizing the images

we will want all our pixel values to be between 0 and 1 (normalized), in order for the neural net to train faster

In [87]:
imageArray = imageArray/255.0

#### Reshaping the image

We are flattening every image meaning we're going to transform each of the dimensions of the image for all the images of the array by flattening all the pixels into a single one vector and we will do that through reshape


In [88]:
imageArray = imageArray.reshape(-1, imageArray.shape[1]*imageArray.shape[2]*imageArray.shape[3])
imageArray.shape

(3000, 49152)

In [106]:
classArray

array(['cats', 'cats', 'cats', ..., 'panda', 'panda', 'panda'],
      dtype='<U5')

In [89]:
imageArray[0]

array([0.09803922, 0.11372549, 0.14901961, ..., 0.62352943, 0.5882353 ,
       0.5686275 ], dtype=float32)

## Train-Test Split

In [90]:
# Perform the split
features_train, features_test, target_train, target_test = train_test_split(imageArray, classArray, test_size=0.3, 
                                                                            random_state=101)

# We will further split the training set into validatoion to evaluate the Neural Network training
features_train, features_val, target_train, target_val = train_test_split(features_train, target_train, test_size=0.3, 
                                                                            random_state=101)

print("Training Features shape: ", features_train.shape)
print("Training Target shape: ", target_train.shape)

print("Validation Features shape: ", features_val.shape)
print("Validation Target shape: ", target_val.shape)

print("Test Features shape: ", features_test.shape)
print("Training Target shape: ", target_test.shape)

Training Features shape:  (1470, 49152)
Training Target shape:  (1470,)
Validation Features shape:  (630, 49152)
Validation Target shape:  (630,)
Test Features shape:  (900, 49152)
Training Target shape:  (900,)


## Building the ANN

### Global Model Constants

In [91]:
imageArray.shape[1]

49152

In [92]:
# Define some global Model Constants

INPUT_SHAPE = (imageArray.shape[1], )
OUTPUT_UNITS = 3
HIDDEN_UNITS= 128
ACTIVATION_HIDDEN = tf.keras.activations.relu
ACTIVATION_OUTPUT = tf.keras.activations.softmax
LEARNING_RATE = 1e-3
OPTIMIZER = tf.keras.optimizers.Adam(LEARNING_RATE)
LOSS_FUNCTION = tf.keras.losses.sparse_categorical_crossentropy
L2_REGULARIZER = tf.keras.regularizers.L2(0.001)
DROPOUT_RATE = 0.2

EPOCHS = 10

In [93]:
# Define the Metrics - These are the metrics we will evaluate during training

METRICS = [tf.keras.metrics.TruePositives(name='tp'),
          tf.keras.metrics.FalsePositives(name='fp'),
          tf.keras.metrics.TrueNegatives(name='tn'),
          tf.keras.metrics.FalseNegatives(name='fn'), 
          tf.keras.metrics.BinaryAccuracy(name='accuracy'),
          tf.keras.metrics.Precision(name='precision'),
          tf.keras.metrics.Recall(name='recall'),
          tf.keras.metrics.AUC(name='auc')]

In [94]:
# Defining a function which will build and compile the model

'''
This will build and compile a model with one hidden layer and 16 neurons
'''
def make_model(metrics=METRICS, output_bias=None):
    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(units=HIDDEN_UNITS, activation=ACTIVATION_HIDDEN, 
                                input_shape=INPUT_SHAPE))
    model.add(tf.keras.layers.Dense(units=HIDDEN_UNITS, activation=ACTIVATION_HIDDEN))
    model.add(tf.keras.layers.Dense(units=OUTPUT_UNITS, activation=ACTIVATION_OUTPUT))
    model.compile(optimizer=OPTIMIZER, loss=LOSS_FUNCTION, metrics=metrics)
    return model

In [95]:
'''
This will build a deep neural network model with multiple hidden layers and implement 
L2 regularization
Dropout regularization
'''

def make_DNNModel(metrics=METRICS, output_bias=None):
    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)
    model = tf.keras.models.Sequential()
    model.add(hub_layer)
    model.add(tf.keras.layers.Dense(units=HIDDEN_UNITS, activation=ACTIVATION_HIDDEN, kernel_regularizer=L2_REGULARIZER))
    model.add(tf.keras.layers.Dropout(rate=DROPOUT_RATE))
    model.add(tf.keras.layers.Dense(units=HIDDEN_UNITS, activation=ACTIVATION_HIDDEN, kernel_regularizer=L2_REGULARIZER))
    model.add(tf.keras.layers.Dense(units=OUTPUT_UNITS, activation=ACTIVATION_OUTPUT))
    model.compile(optimizer=OPTIMIZER, loss=LOSS_FUNCTION, metrics=metrics)
    return model

In [96]:
# Defining a function to plot training loss vs validation loss

'''
This function will take a epoch model from training a neural network
Will plot training loss vs validation loss
'''

def plotTrainLossVsValLoss(epochs_history):
    plt.figure(figsize=(12, 8))
    loss_train = epochs_history.history['loss']
    loss_val = epochs_history.history['val_loss']

    plt.figure(figsize=(12, 8))

    loss_train = epochs_history.history['loss']
    loss_val = epochs_history.history['val_loss']

    epochs = range(1, (EPOCHS + 1))
    plt.plot(epochs, loss_train, 'g', label='Training loss')
    plt.plot(epochs, loss_val, 'b', label='Validation loss')
    plt.title('Training Loss vs Validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

In [97]:
# Defining a function to plot training accuracy vs validation accuracy

'''
This function will take a epoch model from training a neural network
Will plot training accuracy vs validation accuracy
'''

def plotTrainAccuracyVsValAccuracy(epochs_history):
    plt.figure(figsize=(12, 8))

    loss_train = epochs_history.history['accuracy']
    loss_val = epochs_history.history['val_accuracy']

    epochs = range(1, (EPOCHS + 1))
    plt.plot(epochs, loss_train, 'g', label='Training Accuracy')
    plt.plot(epochs, loss_val, 'b', label='Validation Accuracy')
    plt.title('Training Accuracy vs Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()
    plt.show()

In [98]:
# Defining a function to plot the confusion matrix
# Let us visualize the Confusion Matrix and detail out some key metrices including classification report

'''
This function will plot the confusion matrix 
This will also display various performance metrices
'''
def plot_cm(labels, predictions, threshold=0.5):
    cm = confusion_matrix(labels, predictions > threshold)
    plt.figure(figsize=(5, 5))
    sns.heatmap(cm, annot=True, fmt="d")
    plt.title("Confusion Matrix %0.2f" %threshold)
    plt.xlabel("Predicted Label")
    plt.ylabel("Actual Label")
    
    print('Legitimate Transactions Detected (True Negatives): ', cm[0][0])
    print('Legitimate Transactions Incorrectly Detected (False Positives): ', cm[0][1])
    print('Fraudulent Transactions Missed (False Negatives): ', cm[1][0])
    print('Fraudulent Transactions Detected (True Positives): ', cm[1][1])
    print('Total Fraudulent Transactions: ', np.sum(cm[1]))
    print("\n")
    print("F1-Score")
    print(f1_score(target_test, target_predictions > 0.5))
    print("\n")
    print("Accuracy Score")
    print(accuracy_score(target_test, target_predictions > threshold))
    print("\n")
    print("Classification Report")
    print(classification_report(target_test, target_predictions > 0.5))

#### Compile the Model and Check the summary

In [99]:
# Lets build the model and see the mmodel summary

model = make_model()
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 128)               6291584   
_________________________________________________________________
dense_8 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_9 (Dense)              (None, 3)                 387       
Total params: 6,308,483
Trainable params: 6,308,483
Non-trainable params: 0
_________________________________________________________________


## Training the Model

In [103]:
# we will now train the model on training and validation data
# Now use the function to plot the confusion matrix

start = datetime.now()
epochs_history_simple = model.fit(features_train, target_train, epochs=EPOCHS,
                          validation_data=(features_val, target_val),
                          verbose=1)
end = datetime.now()
print(f"The training of simple model completed in time - {end - start}")

Epoch 1/10


TypeError: 'NoneType' object is not callable