In [131]:
import numpy as np
import struct
from array import array
import pandas as pd
import os
from os.path  import join
import random as rn
from sklearn.neural_network import MLPClassifier
import graphviz
import pydotplus
from IPython.display import Image
from io import StringIO
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import learning_curve
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
import random
import matplotlib.pyplot as plt
import time
import mlrose_hiive as mlrose
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

In [132]:
"""
Loading in the dataset into a pandas dataframe object.

For the following segments the code snippets were retreved from: https://www.kaggle.com/code/anetakovacheva/interpreting-a-music-genre-classifier
"""

%matplotlib inline

input_path = 'musicgenre_datafolder'
file_path = join(input_path, 'music_genre.csv')

music_data = pd.read_csv(file_path)

In [133]:
"""
Cleaning and Pre-Processing all of the data
"""

"""
There are some duplicated data that needs to be cleaned up
"""
music_data.duplicated().any()
duplicated = music_data.duplicated()
music_data[duplicated]
music_data.iloc[9999:10006]
music_data.drop([10000, 10001, 10002, 10003, 10004], inplace = True)

"""
Removing some columns that don't matter or will complicated the training too much
"""
music_data.reset_index(inplace = True)
music_data = music_data.drop(["artist_name", "index", "instance_id", "track_name", "obtained_date"], axis = 1)

"""
Normalizing the music data such that it removes invalid values for 'tempo' and converts
the column values into a float
"""
music_data = music_data.drop(music_data[music_data["tempo"] == "?"].index)
music_data["tempo"] = music_data["tempo"].astype("float")
music_data["tempo"] = np.around(music_data["tempo"], decimals = 2)

"""
Encoding the columns that are strings with LabelEncoder since this will mess
up the algorithms that require numeric values
"""
key_encoder = LabelEncoder()
mode_encoder = LabelEncoder()
music_data["key"] = key_encoder.fit_transform(music_data["key"])
music_data["mode"] = mode_encoder.fit_transform(music_data["mode"])

"""
Separating out the column features from the music genre label
"""
music_features = music_data.drop("music_genre", axis = 1)
music_labels = music_data["music_genre"]

"""
Scaling the features out into a scale centered around 0 with a standard deviation of 1
"""
scaler = StandardScaler()
music_features_scaled = scaler.fit_transform(music_features)

In [134]:
"""
Splitting the data into Training and Testing Data Sets
"""
train_features, test_features, train_labels, test_labels = train_test_split(
    music_features_scaled, music_labels, test_size = 0.1, stratify = music_labels)


In [148]:
train_labels = pd.Series.to_numpy(train_labels, copy=True)
test_labels = pd.Series.to_numpy(test_labels, copy=True)

In [150]:
one_hot = OneHotEncoder()

y_train_hot = one_hot.fit_transform(train_labels.reshape(-1, 1)).toarray()
y_test_hot = one_hot.transform(test_labels.reshape(-1, 1)).toarray()

In [151]:
"""
Setting up seed values for reproducability
"""
starting_seed = 1234
seed_values = []

for i in range(0, 5):
    seed_values.append(starting_seed + i)

# np.random.seed(seed)
# rn.seed(seed)
# os.environ['PYTHONHASHSEED'] = str(seed)

In [157]:
"""
Actually training the NN with the mlrose package using the hyperparameters that were found to be 
desired in A1.

This is to recreate the learning / loss curves that were generated in A1 using mlrose.
"""

# Hyperparameters for NN
hidden_nodes = [100, 50, 10]
activation = 'relu'
algorithm = 'random_hill_climb'
max_iters = 50
bias = True
is_classifier = True
learning_rate = 0.2
early_stopping = False
max_attempts = 10
clip_max = 5

nn_model = mlrose.NeuralNetwork(
    hidden_nodes = hidden_nodes,
    activation = activation,
    algorithm = algorithm,
    max_iters = max_iters,
    bias = bias,
    is_classifier = is_classifier,
    learning_rate = learning_rate,
    early_stopping = early_stopping,
    max_attempts = max_attempts,
#     clip_max = clip_max,
    random_state = seed_values[4],
    curve = True
)

nn_model.fit(train_features, y_train_hot)

NeuralNetwork(curve=True, hidden_nodes=[100, 50, 10], learning_rate=0.2,
              max_iters=50, random_state=1238)

In [158]:
print(nn_model.fitted_weights)
print(nn_model.fitness_curve)
print(nn_model.loss)
print(nn_model.predicted_probs)

[ 0.85511765 -0.21738124 -0.17115633 ...  0.59541652 -0.95995017
 -0.01869913]
[[26.95908818  2.        ]
 [26.95908818  3.        ]
 [26.95212381  5.        ]
 [26.95212381  6.        ]
 [26.95166506  8.        ]
 [26.94751858 10.        ]
 [26.94660823 12.        ]
 [26.93899775 14.        ]
 [26.93692076 16.        ]
 [26.93692076 17.        ]
 [26.92977313 19.        ]
 [26.92977313 20.        ]
 [26.92977313 21.        ]
 [26.92977313 22.        ]
 [26.92510125 24.        ]
 [26.92510125 25.        ]
 [26.92237365 27.        ]
 [26.90418063 29.        ]
 [26.90418063 30.        ]
 [26.90418063 31.        ]
 [26.90418063 32.        ]
 [26.89627471 34.        ]
 [26.89627471 35.        ]
 [26.72689221 37.        ]
 [26.72313138 39.        ]
 [26.72282999 41.        ]
 [26.72282999 42.        ]
 [26.72282999 43.        ]
 [26.70598782 45.        ]
 [26.70084147 47.        ]
 [26.70084147 48.        ]
 [26.70084147 49.        ]
 [26.70025528 51.        ]
 [26.61542248 53.        ]
 [2

In [159]:
from sklearn.metrics import accuracy_score

# Predict labels for train set and assess accuracy
y_train_pred = nn_model.predict(train_features)

y_train_accuracy = accuracy_score(y_train_hot, y_train_pred)
print(y_train_accuracy)

0.11199960511377659


In [160]:
print(y_train_pred[:25])

[[0 1 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]]


In [161]:
"""
Got this code from https://www.kaggle.com/code/hojjatk/read-mnist-dataset/notebook for 
instructions on how to load data from the MNIST dataset
"""
class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
    
    def read_images_labels(self, images_filepath, labels_filepath):        
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())        
        
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())        
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
#             img = img.reshape(28, 28)
            images[i][:] = img            
        
        return np.array(images), np.array(labels)
            
    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train),(x_test, y_test)  

In [162]:
#
# Verify Reading Dataset via MnistDataloader class
#
%matplotlib inline
import random
import matplotlib.pyplot as plt

#
# Set file paths based on added MNIST Datasets
#
input_path = 'mnist_datafolder'
training_images_filepath = join(input_path, 'train-images-idx3-ubyte/train-images-idx3-ubyte')
training_labels_filepath = join(input_path, 'train-labels-idx1-ubyte/train-labels-idx1-ubyte')
test_images_filepath = join(input_path, 't10k-images-idx3-ubyte/t10k-images-idx3-ubyte')
test_labels_filepath = join(input_path, 't10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte')

#
# Helper function to show a list of images with their relating titles
#
def show_images(images, title_texts):
    cols = 5
    rows = int(len(images)/cols) + 1
    plt.figure(figsize=(30,20))
    index = 1    
    for x in zip(images, title_texts):        
        image = x[0]        
        title_text = x[1]
        plt.subplot(rows, cols, index)        
        plt.imshow(image, cmap=plt.cm.gray)
        if (title_text != ''):
            plt.title(title_text, fontsize = 15);        
        index += 1

#
# Load MINST dataset
#
mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
(x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()

In [163]:
"""
Setting up seed values for reproducability
"""
starting_seed = 1234
seed_values = []

for i in range(0, 5):
    seed_values.append(starting_seed + i)

# np.random.seed(seed)
# rn.seed(seed)
# os.environ['PYTHONHASHSEED'] = str(seed)

In [164]:
"""
Need to do some data pre-processing to make it work with the mlrose NN class
"""

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(x_train)
X_test_scaled = scaler.transform(x_test)

one_hot = OneHotEncoder()

y_train_hot = one_hot.fit_transform(y_train.reshape(-1, 1)).toarray()
y_test_hot = one_hot.transform(y_test.reshape(-1, 1)).toarray()

In [165]:
one_hot.fit_transform(y_train.reshape(-1, 1)).toarray()[:25]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 

In [176]:
"""
Actually training the NN with the mlrose package using the hyperparameters that were found to be 
desired in A1.

This is to recreate the learning / loss curves that were generated in A1 using mlrose.
"""

# Hyperparameters for NN
hidden_nodes = [2]
activation = 'relu'
algorithm = 'random_hill_climb'
max_iters = 1000
bias = True
is_classifier = True
learning_rate = 0.2
early_stopping = False
max_attempts = 10
clip_max = 5

nn_modelA1 = mlrose.NeuralNetwork(
    hidden_nodes = hidden_nodes,
    activation = activation,
    algorithm = algorithm,
    max_iters = max_iters,
    bias = bias,
    is_classifier = is_classifier,
    learning_rate = learning_rate,
    early_stopping = early_stopping,
    max_attempts = max_attempts,
#     clip_max = clip_max,
    random_state = seed_values[4],
    curve = True
)

nn_modelA1.fit(X_train_scaled, y_train_hot)

NeuralNetwork(curve=True, hidden_nodes=[2], learning_rate=0.2, max_iters=1000,
              random_state=1238)

In [182]:
print(len(nn_modelA1.fitted_weights))
print(nn_modelA1.fitness_curve)
print(nn_modelA1.loss)
print(nn_modelA1.predicted_probs)

1590
[[   3.35362094    2.        ]
 [   3.35360723    4.        ]
 [   3.35360723    5.        ]
 ...
 [   2.30148151 1395.        ]
 [   2.30141581 1397.        ]
 [   2.30141581 1398.        ]]
2.3014158092452632
[[0.1        0.1        0.1        ... 0.1        0.1        0.1       ]
 [0.1        0.1        0.1        ... 0.1        0.1        0.1       ]
 [0.03919367 0.19390826 0.08621977 ... 0.08042162 0.04082504 0.11108355]
 ...
 [0.1        0.1        0.1        ... 0.1        0.1        0.1       ]
 [0.1        0.1        0.1        ... 0.1        0.1        0.1       ]
 [0.1        0.1        0.1        ... 0.1        0.1        0.1       ]]


In [183]:
from sklearn.metrics import accuracy_score

# Predict labels for train set and assess accuracy
y_train_pred = nn_modelA1.predict(X_train_scaled)

y_train_accuracy = accuracy_score(y_train_hot, y_train_pred)
print(y_train_accuracy)

0.17691666666666667


In [184]:
print(y_train[3])
print(y_train_pred[3])
print(y_train_hot[3])

1
[0 1 0 0 0 0 0 0 0 0]
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]


In [185]:
print(y_train_pred[:25])

[[1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]]


In [79]:
"""
Actually training the MLP Classifier to generate the graph for the learning graph visualizer.
This should utilize the percentage of samples lists that use randomly selected samples of the
overall training data. This is so that we can test the accuracy (using cross-validation) across different
training data size samples to see at what point the accuracy score stops being affected by the size of the training
samples. This is also to see at what point the data might start getting overfit.

This is to generate the Accuracy Learning Curve.
"""
# Defining hyperparameters here
hidden_layer_sizes = [100, 50, 5]
activation = 'relu'
learning_rate = 'constant'
max_iter = 1 # Setting this to 1 since we want to control the epochs ourselves
warm_start = True # This is to stack the training across different epochs

number_of_epochs = 15

# lists to hold the results of training / validation scores
x_axis_list = []
avg_train_scores_list = []
avg_validation_scores_list = []
avg_train_loss_values = []
avg_validation_loss_values = []
iteration_wall_clock_list = []

# First declaring the Decision Tree Classifer from scikit-learn
clf = MLPClassifier(
    hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    learning_rate=learning_rate,
    max_iter=max_iter,
    warm_start=warm_start,
    random_state=seed_values[0]
)

for epoch_iteration in range(1, number_of_epochs + 1):
    
    # cross_val_score doesn't increase across epoch runs for some reason so I need to split it myself
    train_data, val_data, train_label, val_label = train_test_split(x_train, y_train, test_size=0.2, random_state=seed_values[0])
    start_time = time.time()
    clf.fit(train_data, train_label)
    training_time = time.time() - start_time
    
    accuracy_score = clf.score(train_data, train_label)
    validation_score = clf.score(val_data, val_label)
#     loss_score = clf.loss_
    train_loss_score = log_loss(train_label, clf.predict_proba(train_data))
    val_loss_score = log_loss(val_label, clf.predict_proba(val_data))
    
    x_axis_list.append(epoch_iteration)
    avg_train_scores_list.append(accuracy_score)
    avg_validation_scores_list.append(validation_score)
    avg_train_loss_values.append(train_loss_score)
    avg_validation_loss_values.append(val_loss_score)
    iteration_wall_clock_list.append(training_time)
    
    print("=============================================")
    print("Run for " + str(epoch_iteration) + " epoch")
    print("Training Score: " + str(accuracy_score))
    print("Validation Score: " + str(validation_score))
#     print("Loss Score: " + str(loss_score))
    print("Training Loss Score: " + str(train_loss_score))
    print("Validation Loss Score: " + str(val_loss_score))
    print("Training Time: " + str(training_time))
    print("=============================================")

    

Run for 1 epoch
Training Score: 0.10545833333333333
Validation Score: 0.10025
Training Loss Score: 2.3128669409813685
Validation Loss Score: 2.31550835857776
Training Time: 0.7898991107940674
Run for 2 epoch
Training Score: 0.10545833333333333
Validation Score: 0.10025
Training Loss Score: 2.303993712122503
Validation Loss Score: 2.3051416775259392
Training Time: 0.8366668224334717
Run for 3 epoch
Training Score: 0.7522916666666667
Validation Score: 0.748
Training Loss Score: 0.851033448222606
Validation Loss Score: 0.8867660597885058
Training Time: 0.7246658802032471
Run for 4 epoch
Training Score: 0.8608958333333333
Validation Score: 0.8515833333333334
Training Loss Score: 0.5148386422015143
Validation Loss Score: 0.56716265141914
Training Time: 0.6933891773223877
Run for 5 epoch
Training Score: 0.9338958333333334
Validation Score: 0.9181666666666667
Training Loss Score: 0.34035650256869104
Validation Loss Score: 0.4022916952377533
Training Time: 0.6818728446960449
Run for 6 epoch
Tr

In [83]:
from sklearn.metrics import accuracy_score

# Predict labels for train set and assess accuracy
y_train_pred = clf.predict(x_train)
print(y_train_pred)
print(y_train)

y_train_accuracy = accuracy_score(y_train, y_train_pred)
print(y_train_accuracy)

[5 0 4 ... 5 6 8]
[5 0 4 ... 5 6 8]
0.9788
