In [1]:
# Import the dependencies

import numpy as np
import pandas as pd
import pickle
import os
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC

import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam

In [2]:
# Function to load the predictor variables

def load_X_file():
    npy_file = "data/X.npy"
        
    npy = np.load(npy_file)    
    
    return npy

In [3]:
# Load the predictor variables

X_npy = load_X_file()
X_npy.shape

(39209, 32, 32, 3)

In [4]:
# Load the target variables

y = np.load("data/y.npy")
y.shape

(39209, 43)

In [5]:
# Create the target dataframe

y_npy = pd.DataFrame(y)
y_npy.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,33,34,35,36,37,38,39,40,41,42
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
# Split the data into training, validation, and test sets

X_train_npy, X_test_npy, y_train_npy, y_test_npy = train_test_split(X_npy, y_npy, test_size=0.1)
X_train_npy, X_val_npy, y_train_npy, y_val_npy = train_test_split(X_train_npy, y_train_npy, test_size=0.1)

In [7]:
# Create and train the Neural Network 

nn = Sequential()

nn.add(Conv2D(filters=32, strides=(2, 2), kernel_size=1, padding='same', input_shape=(32, 32, 3), activation="relu"))
nn.add(Conv2D(32, (3, 3), activation="relu"))
nn.add(MaxPooling2D(pool_size=(2, 2)))
nn.add(Dropout(0.1))

nn.add(Flatten())

nn.add(Dense(1800, activation='relu'))
nn.add(Dense(900, activation='relu'))
nn.add(Dense(512, activation='relu'))
nn.add(Dense(256, activation='relu'))
nn.add(Dense(43, activation='softmax'))
nn.summary()

nn.compile(loss=CategoricalCrossentropy(), optimizer="adam", metrics=["accuracy"])

model = nn.fit(X_train_npy, y_train_npy, validation_data=(X_val_npy, y_val_npy), epochs=100, batch_size=64)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 16, 16, 32)        128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 32)        9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 7, 7, 32)          0         
_________________________________________________________________
dropout (Dropout)            (None, 7, 7, 32)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1568)              0         
_________________________________________________________________
dense (Dense)                (None, 1800)              2824200   
_________________________________________________________________
dense_1 (Dense)              (None, 900)               1

Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [8]:
# Create the predicted target values dataframe

y_pred = nn.predict(X_test_npy)

In [9]:
# Convert the predicted values from decimal values between 0 and 1 to binary values of 0 or 1

y_pred_df = pd.DataFrame((y_pred+0.5).astype(int)).reset_index(drop=True)

# Create the test target variables dataframe
y_test_df = pd.DataFrame(y_test_npy)

In [10]:
# Show the number of columns in test and predicted target variables

y_pred_df.shape[1], y_test_df.shape[1]

(43, 43)

In [11]:
# Compare the predicted vs test target values

y_pred_df_tr = y_pred_df.transpose()
y_test_df_tr = y_test_df.transpose()

print(y_pred_df_tr.shape)
print(y_test_df_tr.shape)

mismatches = []

for n in range(y_pred_df.shape[0]):
    row_pred = y_pred_df.iloc[n]
    row_test = y_test_df.iloc[n]
    
    for m in range(len(row_pred)):
        if row_pred[m] != row_test[m]:
            mismatches.append(n)
            break
            
print(f"Accuracy: {(y_pred_df.shape[0] - len(mismatches))/y_pred_df.shape[0]}")


(43, 3921)
(43, 3921)
Accuracy: 0.9734761540423361
