In [None]:
# Import necessary libraries
import os
import json
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import tensorflow as tf

import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
%matplotlib inline

In [None]:
# Define fonts and fontsize for plotting
plt.rcParams['font.family'] = 'serif'
plt.rcParams['mathtext.fontset'] = 'dejavuserif'
fontsize = 15

In [None]:
# Function to randomly shuffle the data
def shuffle_data(data, labels, SEED):
    
    np.random.seed(SEED)
    
    np.random.shuffle(data)
    np.random.shuffle(labels)
    
    return data, labels

In [None]:
# Function to split the data into training, validation, and testing set
def split_data(data, labels):
    
    # Split the data into training, validation, and testing set in ratio 80:10:10
    # Training set
    data_train = data[:int(0.8*len(data)), :, :]
    labels_train = labels[:int(0.8*len(labels))]
    
    # Validation set
    data_val = data[int(0.8*len(data)):int(0.9*len(data)), :, :]
    labels_val = labels[int(0.8*len(labels)):int(0.9*len(labels))]
    
    # Testing set
    data_test = data[int(0.9*len(data)):, :, :]
    labels_test = labels[int(0.9*len(labels)):]
        
    return data_train, labels_train, data_val, labels_val, data_test, labels_test

In [None]:
# CNN class
class CNN():

    def __init__(self, input_shape, output_shape):
        
        self.input_shape = input_shape
        self.output_shape = output_shape
        
        # Initialize input_layer here
        self.input_layer = None  

    # Method to build the hidden layers
    def build_hidden_layers(self):
        
        # Convolutional Layers
        # First Convolutional Layer
        x1 = tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding='same', kernel_initializer = 'glorot_normal')(self.input_layer)
        x1 = tf.keras.layers.Activation('relu')(x1)
        x1 = tf.keras.layers.BatchNormalization()(x1)

        # Second Convolutional Layer
        x2 = tf.keras.layers.Conv2D(filters=90, kernel_size=(3,3), padding='same', kernel_initializer = 'glorot_normal')(x1)
        x2 = tf.keras.layers.Activation('relu')(x2)
        x2 = tf.keras.layers.BatchNormalization()(x2)

        return x2

    # Method to build the overall model
    def build_model(self):
        
        # Input layer
        self.input_layer = tf.keras.layers.Input(shape=self.input_shape)

        # Hidden layer
        hidden_layer = self.build_hidden_layers()

        # Add a flatten layer
        flatten_layer = tf.keras.layers.Flatten()(hidden_layer)

        # Output Layer
        output_layer = tf.keras.layers.Dense(units=self.output_shape, activation='sigmoid')(flatten_layer)

        # Build model
        self.model = tf.keras.models.Model(inputs=[self.input_layer], outputs=[output_layer])

        return self.model

    # Method to compile the model
    def compile(self, optimizer, loss):
        
        # Compile model
        self.model.compile(optimizer=optimizer, loss=loss)

        return self.model
    
    # Define method to train the model
    def train(self, x_train, y_train, x_val, y_val, epochs, batch_size, callbacks):
        
        # Train model
        self.history = self.model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, callbacks=callbacks,
                                      validation_data=(x_val, y_val))
        
        return self.history
    
    # Method to print summary of model
    def summary(self):
        
        self.model.summary()

In [None]:
class plots:
    
    def __init__(self, history, file_directory):

        self.history = history
        self.file_directory = file_directory

    def loss(self):

        loss_name = list(self.history.history.keys())[0]

        # Training
        loss = self.history.history[loss_name]
        val_loss = self.history.history['val_' + loss_name]

        loss_plot = plt.figure()
        epochs = range(1, len(loss)+1)
        plt.plot(epochs, loss, 'bo--', label = 'Training Loss', markersize = 2)
        plt.plot(epochs, val_loss, 'go--', label = 'Validation Loss', markersize = 2)
        plt.title('Training and Validation Loss', fontsize=fontsize)
        plt.xlabel('Epochs', fontsize=fontsize)
        plt.ylabel('Loss', fontsize=fontsize)
        plt.legend(['Training Loss', 'Validation Loss'], fontsize=fontsize)
        ax = loss_plot.gca()
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
        plt.savefig(self.file_directory + '/loss.pdf', bbox_inches='tight')
        
        return loss_plot

In [None]:
# Define directories
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
grandparent_directory = os.path.dirname(parent_directory)
data_directory = os.path.join(grandparent_directory, 'data')
npy_files_directory = os.path.join(grandparent_directory, 'data', 'npy')
results_directory = os.path.join(grandparent_directory, 'results', 'binary_classification', 'training')

if not os.path.exists(results_directory):
    os.makedirs(results_directory)

In [None]:
# Open the .json files with the class names
with open(os.path.join(data_directory, 'classes.json'), 'r') as file:
    classes = json.load(file)

In [None]:
# Load the .npy files
household_objects = np.load(os.path.join(npy_files_directory, 'household_objects.npy'))
animals = np.load(os.path.join(npy_files_directory, 'animals.npy'))

# Number of dataset in each class
num_household_objects = household_objects.shape[2]
print(f'Number of household objects: {num_household_objects}')

num_animals = animals.shape[2]
print(f'Number of animals: {num_animals}')

In [None]:
# No need to normalize the data since the values are already between 0 and 1
# Reshape the data
household_objects = household_objects.transpose(2, 0, 1)
animals = animals.transpose(2, 0, 1)

# Print the shape of the data
print(f'Household objects shape: {household_objects.shape}')
print(f'Animals shape: {animals.shape}')

In [None]:
# Create the labels
household_objects_labels = np.zeros(num_household_objects)
animals_labels = np.ones(num_animals)

In [None]:
# Stack the data
data = np.vstack((household_objects, animals))
labels = np.hstack((household_objects_labels, animals_labels))

In [None]:
# Randomize the data with a SEED
SEED = 42
data, labels = shuffle_data(data, labels, SEED)

In [None]:
# Split the data into training, validation, and testing set in ratio 80:10:10
data_train, labels_train, data_val, labels_val, data_test, labels_test = split_data(data, labels)

In [None]:
# Print the shape of the training, validation, and testing set
print(f'Training set shape: {data_train.shape}, {labels_train.shape}')
print(f'Validation set shape: {data_val.shape}, {labels_val.shape}')
print(f'Testing set shape: {data_test.shape}, {labels_test.shape}')

In [None]:
# Unsqueeze the data to add the channel dimension
data_train = np.expand_dims(data_train, axis=3)
data_val = np.expand_dims(data_val, axis=3)
data_test = np.expand_dims(data_test, axis=3)

In [None]:
# Define the input shape
input_shape = data_train.shape[1:]
output_shape = 1

In [None]:
# Create an instance of the CNN class
cnn = CNN(input_shape, output_shape)

# Build the model
model = cnn.build_model()

# Compile the model
optimizer = 'adam'
loss = 'binary_crossentropy'
model = cnn.compile(optimizer, loss)

# Print the summary of the model
cnn.summary()

In [None]:
# Define the number of epochs and batch size
epochs = 1
batch_size = 32

# Define the callbacks
callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)]

# Train the model
history = cnn.train(data_train, labels_train, data_val, labels_val, epochs, batch_size, callbacks)

In [None]:
plot = plots(history, results_directory)
loss_plot = plot.loss()

In [None]:
# Save the model
print('Saving the model...')
model.save(os.path.join(results_directory, 'model.h5'))