In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
import itertools
import sys

sys.path.append("..")

from utility_functions import (calculate_model_performance,
                               plot_ROC,
                               one_hot_encode,
                               split_data_as,
                               random_shuffle)


def get_shapes(any_):
    for array in any_:
        try:
            print(array.shape)
        except:
            print("NONE")
    print("\n")


# ============= ACTIVATION FUNCTIONS ===============#

def sigmoid(Z, prime=False):
    # np.
    if prime:
        return sigmoid(Z) * (1 - sigmoid(Z))
    return 1 / (1 + np.exp(-Z))


def linear(Z, prime=False):
    if prime:
        return np.ones_like(Z)
    return Z


def relu(Z, alpha=0.01, prime=False):
    if prime:
        Z_relu = np.ones_like(Z, dtype=np.float64)
        Z_relu[Z < 0] = alpha
        return Z_relu
    return np.where(Z < 0, alpha * Z, Z)


def tanh(Z, prime=False):
    # np.tanh() could be used directly to speed this up
    if prime:
        return 1 - np.power(tanh(Z), 2)
    return (2 / (1 + np.exp(-2 * Z))) - 1


def elu(Z, prime=False):
    # https://mlfromscratch.com/activation-functions-explained/#/
    alpha = 0.2
    if prime:
        return np.where(Z < 0, alpha * (np.exp(Z)), 1)
    return np.where(Z < 0, alpha * (np.exp(Z) - 1), Z)


def softmax(Z, prime=False):
    # https://deepnotes.io/softmax-crossentropy
    # max(Z) term is added to stabilise the function.
    exps = np.exp(Z - np.max(Z))
    return exps / np.sum(exps, axis=0)


# ============== LOSS FUNCTIONS ===============#

# https://deepnotes.io/softmax-crossentropy
EPSILON = 1e-8


def calculate_error(Y, Y_hat):
    # Y and Y_hat should be in the form of (no_of_classes, no_of_training_examples)
    m = Y.shape[1]
    return -np.sum(Y * np.log(Y_hat + EPSILON)) / m


# References
# https://mc.ai/multilayered-neural-network-from-scratch-using-python/
# https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/
# https://www.coursera.org/learn/machine-learning/home/week/5
# https://www.coursera.org/specializations/deep-learning
# https://github.com/mnielsen/neural-networks-and-deep-learning/blob/master/src/network.py
# https://github.com/JWarmenhoven/Coursera-Machine-Learning

class NeuralNetwork:

    def __init__(
            self,
            input_layer: tuple,
            hidden_layer: list,  # list of tuples
            output_layer: int,
            batch_size=16,
            alpha=1,
            epoch=500,
            random_state=42,
            verbose=True,
            metrics="accuracy"
    ):
        self.input_layer = input_layer
        self.hidden_layer = hidden_layer
        self.output_layer = output_layer
        self.mini_batch_size = batch_size
        self.alpha = alpha
        self.epoch = epoch
        self.seed = random_state
        self.verbose = verbose
        self.metrics = metrics

        self.layers = len(self.weight_set_dimensions) + 1


    def __str__(self):
        parameters = (
            "Input layer: {0}\n"
            "Hidden layer: {1}\n"
            "Output layer: {2}\n"
            "Batch size: {3}\n"
            "Learning rate: {4}\n"
            "Epoch: {5}\n"
            "Seed: {6}\n"
            "Verbose: {7}\n"
            "Metric: {8}"
        ).format(
            self.input_layer,
            self.hidden_layer,
            self.output_layer,
            self.mini_batch_size,
            self.alpha,
            self.epoch,
            self.seed,
            self.verbose,
            self.metrics
        )
        return parameters

    def get_A(self, X):
        A, _ = self.forwardpass(X)
        return A

    def get_Z(self, X):
        _, Z = self.forwardpass(X)
        return Z

    def display_information(self, X, Y, epoch_no):
        model_performance_metrics = calculate_model_performance(
            np.argmax(Y, axis=0),
            self.predict(X)
        )
        print("%s: %.10f - epoch %s    iteration %s - loss %.20f" % (
            self.metrics,
            model_performance_metrics[self.metrics],
            epoch_no,
            self.no_of_iterations,
            calculate_error(Y,
                            self.get_A(X)[-1])
        )
              )

    def get_dimensions_and_activations(self):
        self.dimensions = []
        self.activation_functions = []

        self.dimensions.append(self.input_layer[0])
        self.activation_functions.append(self.input_layer[1])

        for dim, act_func in self.hidden_layer:
            self.dimensions.append(dim)
            self.activation_functions.append(act_func)

        self.dimensions.append(self.output_layer)

    @property
    def weight_set_dimensions(self):
        self.get_dimensions_and_activations()
        a, b = itertools.tee(self.dimensions[::-1])
        next(b, None)
        weight_set_dimensions = list(zip(a, b))[::-1]
        return weight_set_dimensions

    def initialise_weights(self, layer=None):
        self.W = np.empty_like(range(self.layers), dtype=object)
        self.B = np.empty_like(range(self.layers), dtype=object)
        self.W[0] = None
        self.B[0] = None
        for layer, (y, x) in zip(range(1, self.layers), self.weight_set_dimensions):
            np.random.seed(self.seed)
            self.W[layer] = np.random.rand(y, x) / np.sqrt(self.dimensions[layer - 1])
            self.B[layer] = np.random.rand(y, 1)

    def forwardpass(self, X):
        Z = np.empty_like(range(self.layers), dtype=object)
        A = np.empty_like(range(self.layers), dtype=object)
        A[0] = X
        Z[0] = None
        for layer in range(1, self.layers):
            # activation_function starts from 0 whereas layer starts from 1
            active_function = self.activation_functions[layer - 1]
            arg_to_pass_to_eval = "(Z[layer])"

            Z[layer] = self.W[layer] @ A[layer - 1] + self.B[layer]
            A[layer] = eval(active_function + arg_to_pass_to_eval)
        return A, Z

    def backpropagation(self, Y, A, Z):
        self.delta = np.empty_like(range(self.layers), dtype=object)
        self.delta[0] = None

        self.gradient_W = np.empty_like(range(self.layers), dtype=object)
        self.gradient_B = np.empty_like(range(self.layers), dtype=object)
        self.gradient_W[0] = None
        self.gradient_B[0] = None

        self.delta[-1] = A[-1] - Y

        # We substract 1 here as delta_final is calculated seperately above
        for layer in reversed(range(1, self.layers - 1)):
            # 1 is substracted from layer as activations_functions start indexing from 0
            active_function = self.activation_functions[layer - 1]
            arg_to_pass_to_eval = "(Z[layer], prime=True)"


            self.delta[layer] = (
                    self.W[layer + 1].T @ self.delta[layer + 1] *
                    eval(active_function + arg_to_pass_to_eval)
            )

            # calculate the gradient

        for layer in range(1, self.layers):
            self.gradient_W[layer] = (self.delta[layer] @ A[layer - 1].T) / self.m
            self.gradient_B[layer] = np.sum(self.delta[layer], axis=1, keepdims=True) / self.m

        # update the weights
        for layer in range(1, self.layers):
            cost = calculate_error(Y, A[-1])
            cost_other = -np.mean(Y * np.log(A[-1] + 1e-8))
            self.W[layer] -= self.alpha * self.gradient_W[layer]
            self.B[layer] -= self.alpha * self.gradient_B[layer]


    def fit(self, X, Y):
        self.m = X.shape[1] # where (no_of_features, no_of_training_examples)
        self.initialise_weights()

        # By default the method is SGD(Stochastic Gradient Descent) if one wishes to use
        # the whole batch, simply pass the number of traning examples available as the
        # batch size when instantiating the class
        self.no_of_iterations = 0
        shuffled = np.arange(self.m)
        if self.verbose:
            print("Initialising weights...")
            print("Starting the training...")
            print("Initial cost: %.10f\n" % calculate_error(Y, self.get_A(X)[-1]))
        for epoch_no in range(1, self.epoch + 1):
            np.random.shuffle(shuffled)
            X_shuffled = X[:, shuffled]
            Y_shuffled = Y[:, shuffled]
            for i in range(0, self.m, self.mini_batch_size):
                self.no_of_iterations += 1
                X_mini_batch = X_shuffled[:, i: i + self.mini_batch_size]
                Y_mini_batch = Y_shuffled[:, i: i + self.mini_batch_size]

                A, Z = self.forwardpass(X_mini_batch)
                self.backpropagation(Y_mini_batch, A, Z)
                if self.no_of_iterations % 5000 == 0 and self.verbose:
                    self.display_information(X, Y, epoch_no)

    def predict(
            self,
            X: np.ndarray,
            return_prob_matrix=False
    ):
        """Predict the output given the training data.

            Returns the predicted values in two forms:

            1.either by picking up the highest value along the columns for every row,
                i.e. "np.argmax(self.A[-1].T, axis=1)"
            2.or by returning a matrix that is in the shape of Y.T where each column
                represents the probability of the instance belonging to that class.
                Please note that every column in Y.T represents a class. To be able to
                return the probability matrix, the final activation function must be
                softmax!
                i.e. "array([0.9650488423, 0.0354737543, 0.0005225966])"

        Args:
            X (numpy.ndarray): Training set in the shape of
                (no_of_features, no_of_training examples).
            return_prob_matrix (bool, optional): Returns the probability matrix if True.
                Defaults to False.

        Returns:
            numpy.ndarray:

            if return_prob_matrix is False, the output is in the shape of
                (no_of_training_examples, 1)
            if return_prob_matrix is True, the output is in the shape of
                (no_of_training_examples, no_of_features)
        """
        A, Z = self.forwardpass(X)
        if return_prob_matrix and self.activation_functions[-1] == "softmax":
            np.set_printoptions(precision=10, suppress=True)
            return A[-1].T
        return np.argmax(A[-1].T, axis=1)

# Testing with benchmark datasets

## 1.Iris Dataset

In [2]:
from sklearn.datasets import load_iris
data = load_iris()

x = data.data[:,[0,2]]
y = data.target

X = x.T
Y = one_hot_encode(y).T

# train, test = split_data_as(x, y, train=0.8, test=0.2)

# X_train = train[:, :-1]
# Y_train = train[:, -1].reshape(-1, 1)

In [3]:
print(X.shape)
print(Y.shape)
# print("\n")
# print(X_test.shape)
# print(Y_test.shape)
# print("\n")
# print(X_validation.shape)
# print(Y_validation.shape)

(2, 150)
(3, 150)


In [9]:
from sklearn.model_selection import ParameterGrid

def grid_search(X, y, clf, metric, n_fold=3, **kwargs):
    # X and y are in the shape of (no_of_features, no_of_training_examples)
    split_indices = np.int_(np.linspace(len(X)/n_fold, len(X), num=n_fold))
    dataset_shuffled = random_shuffle(X, y)

    splitted = np.array(
        np.split(
            dataset_shuffled,
            split_indices
        )[:-1]
    )

    models = {}
    results_dict_all_models = {}
    results_average_dict = {}

    grid = ParameterGrid(kwargs['param_grid'])
    n_to_run = len(grid) * n_fold
    count = 1
    for index_model, params in enumerate(grid):
        models["model_" + str(index_model + 1)] = clf(**params)
        results_dict = {}

        for index_fold in range(n_fold):
            print("\n*********{}/{}*********".format(count ,n_to_run))
            print("Running model {0} fold {1}".format(
                str(index_model + 1),
                str(index_fold + 1)
            ))
            arrays_to_be_joined = np.delete(splitted, index_fold, axis = 0)
            dataset_train = np.concatenate(arrays_to_be_joined)
            dataset_test = splitted[index_fold]

            x_train = dataset_train[:, :-1].T
            y_train = one_hot_encode(dataset_train[:, -1]).T

            models["model_" + str(index_model + 1)].fit(x_train, y_train)

            x_test = dataset_test[:, :-1].T
            y_test = one_hot_encode(dataset_test[:, -1]).T

            results_dict["model_" + str(index_model + 1) + "_fold_" + str(index_fold + 1)] = \
            calculate_model_performance(
                np.argmax(y_test, axis=0),
                models["model_" + str(index_model + 1)].predict(x_test)
            )[metric]
            count += 1
            
        results_dict_all_models[index_model + 1] = results_dict

        vals = np.fromiter(results_dict.values(), dtype=float)
        results_average_dict["model_" + str(index_model + 1)] = np.average(vals)
        results_average_dict = dict(sorted(results_average_dict.items(), key=lambda x: x[1])[::-1])

    return results_dict_all_models, results_average_dict, models

In [10]:
result_dict_all_models, results_average_dict, models = grid_search(
    x,
    y,
    metric='F1',
    clf=NeuralNetwork,
    n_fold=3,
    param_grid={
        'batch_size': [8, 16, 32, 64],
        'input_layer': [(2, 'relu'), (2, 'tanh')],
        'hidden_layer': [[(4,'relu'), (4,'relu'),(4,'softmax')], [(4,'sigmoid'),(4,'softmax')]],
        'output_layer': [3],
        'alpha': [2, 4],
        'verbose': [True],
        'epoch': [10]
    }
)


*********1/96*********
Running model 1 fold 1
Initialising weights...
Starting the training...
Initial cost: 1.9573791653


*********2/96*********
Running model 1 fold 2
Initialising weights...
Starting the training...
Initial cost: 1.8939648063


*********3/96*********
Running model 1 fold 3
Initialising weights...
Starting the training...
Initial cost: 1.9619631243


*********4/96*********
Running model 2 fold 1
Initialising weights...
Starting the training...
Initial cost: 1.3583599141


*********5/96*********
Running model 2 fold 2
Initialising weights...
Starting the training...
Initial cost: 1.3310232629


*********6/96*********
Running model 2 fold 3
Initialising weights...
Starting the training...
Initial cost: 1.3545285979


*********7/96*********
Running model 3 fold 1
Initialising weights...
Starting the training...
Initial cost: 1.2588600307


*********8/96*********
Running model 3 fold 2
Initialising weights...
Starting the training...
Initial cost: 1.2372829437


*******


*********67/96*********
Running model 23 fold 1
Initialising weights...
Starting the training...
Initial cost: 1.2588600307


*********68/96*********
Running model 23 fold 2
Initialising weights...
Starting the training...
Initial cost: 1.2372829437


*********69/96*********
Running model 23 fold 3
Initialising weights...
Starting the training...
Initial cost: 1.2519764210


*********70/96*********
Running model 24 fold 1
Initialising weights...
Starting the training...
Initial cost: 1.2356830890


*********71/96*********
Running model 24 fold 2
Initialising weights...
Starting the training...
Initial cost: 1.2156481860


*********72/96*********
Running model 24 fold 3
Initialising weights...
Starting the training...
Initial cost: 1.2272074932


*********73/96*********
Running model 25 fold 1
Initialising weights...
Starting the training...
Initial cost: 1.9573791653


*********74/96*********
Running model 25 fold 2
Initialising weights...
Starting the training...
Initial cost: 1.8939



In [11]:
result_dict_all_models

{1: {'model_1_fold_1': 69.38775509875552,
  'model_1_fold_2': 66.66666666359554,
  'model_1_fold_3': 61.22448979299459},
 2: {'model_2_fold_1': 69.38775509875552,
  'model_2_fold_2': 0.0,
  'model_2_fold_3': 48.38709677226588},
 3: {'model_3_fold_1': 21.052631576542936,
  'model_3_fold_2': 0.0,
  'model_3_fold_3': 61.22448979299459},
 4: {'model_4_fold_1': 0.0,
  'model_4_fold_2': 0.0,
  'model_4_fold_3': 61.22448979299459},
 5: {'model_5_fold_1': 0.0, 'model_5_fold_2': 0.0, 'model_5_fold_3': 0.0},
 6: {'model_6_fold_1': 0.0,
  'model_6_fold_2': 0.0,
  'model_6_fold_3': 61.22448979299459},
 7: {'model_7_fold_1': 90.32258063883872,
  'model_7_fold_2': 0.0,
  'model_7_fold_3': 61.22448979299459},
 8: {'model_8_fold_1': 0.0,
  'model_8_fold_2': 0.0,
  'model_8_fold_3': 46.15384615207101},
 9: {'model_9_fold_1': 0.0, 'model_9_fold_2': 0.0, 'model_9_fold_3': 0.0},
 10: {'model_10_fold_1': 0.0, 'model_10_fold_2': 0.0, 'model_10_fold_3': 0.0},
 11: {'model_11_fold_1': 0.0,
  'model_11_fold_2'

In [12]:
results_average_dict

{'model_23': 70.62074829539326,
 'model_1': 65.75963718511521,
 'model_19': 64.0502354750173,
 'model_7': 50.51569014394443,
 'model_24': 43.537414963916696,
 'model_20': 43.537414963916696,
 'model_2': 39.25828395700713,
 'model_31': 33.295063144552074,
 'model_32': 28.553830991497648,
 'model_3': 27.425707123179176,
 'model_15': 21.333333331481604,
 'model_28': 20.40816326433153,
 'model_6': 20.40816326433153,
 'model_4': 20.40816326433153,
 'model_11': 20.40816326433153,
 'model_27': 20.40816326433153,
 'model_22': 15.873015872391031,
 'model_14': 15.38461538402367,
 'model_8': 15.38461538402367,
 'model_13': 15.38461538402367,
 'model_21': 15.38461538402367,
 'model_30': 0.0,
 'model_26': 0.0,
 'model_18': 0.0,
 'model_16': 0.0,
 'model_12': 0.0,
 'model_10': 0.0,
 'model_5': 0.0,
 'model_9': 0.0,
 'model_17': 0.0,
 'model_25': 0.0,
 'model_29': 0.0}

In [13]:
models

{'model_1': <__main__.NeuralNetwork at 0x7f7e301b5690>,
 'model_2': <__main__.NeuralNetwork at 0x7f7e301b5510>,
 'model_3': <__main__.NeuralNetwork at 0x7f7e301b5b90>,
 'model_4': <__main__.NeuralNetwork at 0x7f7e30103390>,
 'model_5': <__main__.NeuralNetwork at 0x7f7e30103e10>,
 'model_6': <__main__.NeuralNetwork at 0x7f7e301b5850>,
 'model_7': <__main__.NeuralNetwork at 0x7f7e301b50d0>,
 'model_8': <__main__.NeuralNetwork at 0x7f7e301b5dd0>,
 'model_9': <__main__.NeuralNetwork at 0x7f7e301b5490>,
 'model_10': <__main__.NeuralNetwork at 0x7f7e2fdcc0d0>,
 'model_11': <__main__.NeuralNetwork at 0x7f7e2fdcc9d0>,
 'model_12': <__main__.NeuralNetwork at 0x7f7e2fdccc50>,
 'model_13': <__main__.NeuralNetwork at 0x7f7e2fdccf10>,
 'model_14': <__main__.NeuralNetwork at 0x7f7e2fdcc990>,
 'model_15': <__main__.NeuralNetwork at 0x7f7e2fdccfd0>,
 'model_16': <__main__.NeuralNetwork at 0x7f7e2fdcce10>,
 'model_17': <__main__.NeuralNetwork at 0x7f7e2fdd8210>,
 'model_18': <__main__.NeuralNetwork at 

In [14]:
print(models["model_32"])

Input layer: (2, 'tanh')
Hidden layer: [(4, 'sigmoid'), (4, 'softmax')]
Output layer: 3
Batch size: 64
Learning rate: 4
Epoch: 10
Seed: 42
Verbose: True
Metric: accuracy


In [None]:
calculate_model_performance(
    np.argmax(Y, axis=0),
    models["model_5"].predict(X)
)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
dt = data.data[:,[0,2]]
x_min, x_max = dt[:, 0].min() - 1, dt[:, 0].max() + 1
y_min, y_max = dt[:, 1].min() - 1, dt[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

# here "model" is your model's prediction (classification) function
Z = models["model_5"].predict(np.c_[xx.ravel(), yy.ravel()].T) 

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(figsize=(12,8))
plt.contourf(xx, yy, Z,alpha=0.4)
#plt.axis('off')

plt.scatter(dt[:, 0], dt[:, 1], c=y,s=20, edgecolor='k')
plt.xlabel('sepal length')
plt.ylabel('petal length')

## 2.Make Moons dataset

In [None]:
from sklearn.datasets import make_moons

x,y =make_moons(n_samples=1500, noise=.05)
X = x.T
Y = one_hot_encode(y).T

print(X.shape)
print(Y.shape)

In [None]:
model = NeuralNetwork(
    input_layer=(2, "sigmoid"),
    hidden_layer=[(8, "tanh"),
                  (6, "relu"),
                  (4, "softmax")],
    output_layer=2,
    batch_size=64,
    alpha=0.5,
    epoch=2500,
    random_state=42
)


model.fit(X, Y)

In [None]:
### Decision Boundaries
%matplotlib inline
import matplotlib.pyplot as plt
dt = x
x_min, x_max = dt[:, 0].min() - 0.5, dt[:, 0].max() + 0.5
y_min, y_max = dt[:, 1].min() - 0.5, dt[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

# here "model" is your model's prediction (classification) function
Z = model.predict(np.c_[xx.ravel(), yy.ravel()].T) 

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(figsize=(12,8))
plt.contourf(xx, yy, Z,alpha=0.4)
#plt.axis('off')
plt.scatter(dt[:, 0], dt[:, 1], c=y, s=20, edgecolor='k')
plt.title('Decision Boundaries')

## 3.Andrew NG Assignment 2 Dataset

In [None]:
ex2data2 = np.loadtxt("../ex2/data/ex2data2.txt", delimiter=",")

X = ex2data2[:, :-1]
y = ex2data2[:, -1]

X = X.T
Y = one_hot_encode(y).T

In [None]:
print(X.shape)
print(Y.shape)

In [None]:
model = NeuralNetwork(
    input_layer=(2, "relu"),
    hidden_layer=[(4, "relu"),
                  (4, "softmax")],
    output_layer=2,
    batch_size=64,
    alpha=0.5,
    epoch=25000,
    random_state=42
)

model.fit(X, Y)

In [None]:
model_performance_metrics = calculate_model_performance(
    np.argmax(Y, axis=0),
    model.predict(X)
)

model_performance_metrics

In [None]:
### Decision Boundaries
%matplotlib inline
import matplotlib.pyplot as plt
X = ex2data2

x1_min, x1_max = X[:, 0].min() - 0.3, X[:, 0].max() + 0.3,
x2_min, x2_max = X[:, 1].min() - 0.3, X[:, 1].max() + 0.3,
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))

# here "model" is your model's prediction (classification) function
Z = model.predict(np.c_[xx1.ravel(), xx2.ravel()].T) 

negatives = ex2data2[ex2data2[:, -1] == 0]
positives = ex2data2[ex2data2[:, -1] == 1]

# Put the result into a color plot
Z = Z.reshape(xx1.shape)
plt.figure(figsize=(12,8))
plt.contourf(xx1, xx2, Z,alpha=0.4)
#plt.axis('off')
plt.scatter(negatives[:, 0], negatives[:, 1],s=50, color='k')
plt.scatter(positives[:, 0], positives[:, 1],s=50, color='r')
plt.title('Decision Boundaries')

plt.contour(xx1, xx2, Z, [0.5], linewidths=2, colors="g")

# MNIST dataset

In [None]:
from scipy.io import loadmat
data = loadmat('../ex3/data/ex3data1.mat')
data.keys()

In [None]:
x = data["X"]
y = data["y"]

In [None]:
train, test, validation = split_data_as(x,y, train=0.6, test=0.2, validation=0.2)

X_train = train[:, :-1].T
Y_train = one_hot_encode(train[:, -1]).T

X_test = test[:, :-1].T
Y_test = one_hot_encode(test[:, -1]).T

X_validation = validation[:, :-1].T
Y_validation = one_hot_encode(validation[:, -1]).T

In [None]:
print(X_train.shape)
print(Y_train.shape)
print("\n")
print(X_test.shape)
print(Y_test.shape)
print("\n")
print(X_validation.shape)
print(Y_validation.shape)

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(20,20))

sample = np.random.choice(data["X"].shape[0], 20)
ax.imshow(data["X"][sample,1:].reshape(-1,20).T)
ax.axis('off');

In [None]:
model = NeuralNetwork(
    input_layer=(X_train.shape[0], "relu"),
    hidden_layer=[(10, "relu"),
                  (10, "softmax")],
    output_layer=Y_train.shape[0],
    batch_size=16,
    alpha=0.2,
    epoch=500,
    random_state=12
)


model.fit(X_train, Y_train)

In [None]:
np.sum(model.predict(X_train) == np.argmax(Y_train,axis=0))

In [None]:
calculate_model_performance(
    np.argmax(Y_train,axis=0),
    model.predict(X_train)
)

In [None]:
for index, (act, predicted) in enumerate(zip(np.argmax(Y_train,axis=0), model.predict(X_train))):
    if act != predicted:
        fig, ax = plt.subplots(figsize = (2,2))
        ax.set_title("%s: act %s --- predicted %s" %(index, act + 1, predicted + 1))
        ax.imshow(X_train[:, index].reshape(-1,20).T)
        ax.axis('off');

In [None]:
calculate_model_performance(
    np.argmax(Y_test,axis=0),
    model.predict(X_test)
)

In [None]:
for index, (act, predicted) in enumerate(zip(np.argmax(Y_test,axis=0), model.predict(X_test))):
    if act != predicted:
        fig, ax = plt.subplots(figsize = (2,2))
        ax.set_title("%s: act %s --- predicted %s" %(index, act + 1, predicted + 1))
        ax.imshow(X_test[:, index].reshape(-1,20).T)
        ax.axis('off');

In [None]:
def cross_validation():
    pass