In [1]:
# python notebook for Make Your Own Neural Network
# code for a 3-layer neural network, and code for learning the MNIST dataset
# (c) Tariq Rashid, 2016
# license is GPLv2

In [2]:
import numpy
# scipy.special for the sigmoid function expit()
import scipy.special
# library for plotting arrays
import matplotlib.pyplot
# ensure the plots are inside this notebook, not an external window
%matplotlib inline

In [3]:
# neural network class definition
class neuralNetwork:
    
    
    # initialise the neural network
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        # set number of nodes in each input, hidden, output layer
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        
        # link weight matrices, wih and who
        # weights inside the arrays are w_i_j, where link is from node i to node j in the next layer
        # w11 w21
        # w12 w22 etc 
        self.wih = numpy.random.normal(0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
        self.who = numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.onodes, self.hnodes))

        # learning rate
        self.lr = learningrate
        
        # activation function is the sigmoid function
        self.activation_function = lambda x: scipy.special.expit(x)
        
        pass

    
    # train the neural network
    def train(self, inputs_list, targets_list):
        # convert inputs list to 2d array
        inputs = numpy.array(inputs_list, ndmin=2).T
        targets = numpy.array(targets_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = numpy.dot(self.wih, inputs)
        # calculate the signals emerging from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals into final output layer
        final_inputs = numpy.dot(self.who, hidden_outputs)
        # calculate the signals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        # output layer error is the (target - actual)
        output_errors = targets - final_outputs
        # hidden layer error is the output_errors, split by weights, recombined at hidden nodes
        hidden_errors = numpy.dot(self.who.T, output_errors) 
        
        # update the weights for the links between the hidden and output layers
        self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), numpy.transpose(hidden_outputs))
        
        # update the weights for the links between the input and hidden layers
        self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), numpy.transpose(inputs))
        
        pass

    
    # query the neural network
    def query(self, inputs_list):
        # convert inputs list to 2d array
        inputs = numpy.array(inputs_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = numpy.dot(self.wih, inputs)
        # calculate the signals emerging from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals into final output layer
        final_inputs = numpy.dot(self.who, hidden_outputs)
        # calculate the signals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        return final_outputs

## Modifications to the ANN Code

### Overview
The provided code outlines the process of training and testing an Artificial Neural Network (ANN) on the MNIST dataset. The key modifications introduced focus on enhancing the training flexibility by looping through different dataset files and providing comprehensive performance reports using `sklearn`.

### Key Changes

1. **File Looping for Training Data**:
    - Added capability to loop through multiple training files, facilitating easy training on the original dataset, a balanced dataset, and several imbalanced datasets.
    - Imbalanced datasets are generated for different percentages and digits, allowing for a diverse training experience.

2. **Performance Reporting**:
    - Incorporated `sklearn` for comprehensive performance reporting.
    - After testing the neural network, the code computes:
        - A confusion matrix, offering insights into true vs. predicted classifications.
        - Precision, Recall, and F1-score for each class.
        - ROC AUC (Receiver Operating Characteristic Area Under the Curve) score for multi-class classification. The macro-average ROC AUC is also computed and displayed.

3. **Data Loading and Pre-processing**:
    - The training and testing data files are loaded separately within the `train_and_test` function.
    - The pixel values of the images are normalized to fall between 0.01 and 0.99.

4. **Neural Network Testing**:
    - During testing, the neural network's outputs for each test instance are collected.
    - These outputs are further used for computing performance metrics.

### Implementation Details
- The neural network is instantiated with 784 input nodes (corresponding to the 28x28 pixel MNIST images), 200 hidden nodes, and 10 output nodes (for the 10 digit classes).
- The learning rate is set at 0.1.
- The neural network is trained for 5 epochs on each dataset file.
- The test dataset remains consistent for all training files.

### Conclusion
These modifications enhance the code's adaptability to different training scenarios and provide a detailed performance breakdown for each training iteration.


In [4]:
import numpy
import scipy.special
import matplotlib.pyplot

from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
from sklearn.preprocessing import label_binarize
# Ensure the plots are inside this notebook, not an external window
%matplotlib inline

# [Original neuralNetwork class definition here...]

def train_and_test(training_file, test_file):
    # Load the training data
    training_data_file = open(training_file, 'r')
    training_data_list = training_data_file.readlines()
    training_data_file.close()

    # Train the neural network
    for e in range(epochs):
        for record in training_data_list:
            all_values = record.split(',')
            inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
            targets = numpy.zeros(output_nodes) + 0.01
            targets[int(all_values[0])] = 0.99
            n.train(inputs, targets)
        pass

    # Load the test data
    test_data_file = open(test_file, 'r')
    test_data_list = test_data_file.readlines()
    test_data_file.close()

    # Test the neural network and collect scorecard
    # Test the neural network and collect scorecard
    scorecard = []
    all_outputs = []  # Store all network outputs
    all_labels = []   # Store all true labels
    
    for record in test_data_list:
        all_values = record.split(',')
        correct_label = int(all_values[0])
        inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
        outputs = n.query(inputs)
        label = numpy.argmax(outputs)
        scorecard.append(label)
        all_outputs.append(outputs.ravel())
        all_labels.append(correct_label)
    
    all_outputs = numpy.array(all_outputs)
    all_labels = numpy.array(all_labels)
    
    # Computing confusion matrix
    cm = confusion_matrix(all_labels, scorecard)
    print("Confusion Matrix:")
    print(cm)
    
    # Compute Precision, Recall, and F1-score
    report = classification_report(all_labels, scorecard, digits=4)
    print(report)
    
    # Compute ROC AUC
    # Binarize the labels and predictions
    binarized_labels = label_binarize(all_labels, classes=[0,1,2,3,4,5,6,7,8,9])
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(output_nodes):
        roc_auc[i] = roc_auc_score(binarized_labels[:, i], all_outputs[:, i])
    # Compute macro-average ROC AUC
    auc = numpy.mean(list(roc_auc.values()))
    print(f"AUC-ROC (Macro average): {auc:.4f}")

    scorecard_array = numpy.asarray(scorecard)
    performance = numpy.mean(scorecard_array == all_labels)
    print(f"Performance for {training_file} = ", performance)
    return performance

# Create an instance of the neural network
input_nodes = 784
hidden_nodes = 200
output_nodes = 10
learning_rate = 0.1
n = neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)

# File paths
train_files = [
    "mnist_dataset/mnist_train.csv",
    "mnist_dataset/mnist_balanced.csv",
]

# Add the imbalance files for each percentage and digit
for percentage in [2, 5, 8, 15, 20, 50, 80]:
    for digit in range(10):
        train_files.append(f"mnist_dataset/mnist_imbalanced_{digit}_{percentage}.csv")

# Assuming test data is same for all
test_file = "mnist_dataset/mnist_test.csv"

# Number of epochs
epochs = 5

for train_file in train_files:
    train_and_test(train_file, test_file)


Confusion Matrix:
[[ 976    0    0    0    0    1    1    1    1    0]
 [   0 1126    2    1    0    1    2    0    3    0]
 [  11    3  994    8    0    0    3    7    5    1]
 [   2    0    2  987    1    2    0    5    5    6]
 [   3    0    2    0  948    0    5    0    1   23]
 [   4    1    0   18    0  853    5    1    6    4]
 [  13    3    0    1    1    8  928    0    3    1]
 [   4    8    7    1    1    0    0  989    0   18]
 [   5    1    1    8    5    4    3    3  941    3]
 [   7    4    1    7    6    3    1    4    3  973]]
              precision    recall  f1-score   support

           0     0.9522    0.9959    0.9736       980
           1     0.9825    0.9921    0.9873      1135
           2     0.9851    0.9632    0.9740      1032
           3     0.9573    0.9772    0.9672      1010
           4     0.9854    0.9654    0.9753       982
           5     0.9782    0.9563    0.9671       892
           6     0.9789    0.9687    0.9738       958
           7     0