In [None]:
import numpy as np
import pandas as pd
import random
import struct
from array import array
from os.path  import join
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import accuracy_score, mean_squared_error, plot_confusion_matrix
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 
from matplotlib.pyplot import figure

# Part A: MLP for Classification

(a) **Data Pre-processing**

In [None]:
class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
    
    def read_images_labels(self, images_filepath, labels_filepath):        
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())        
        
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())        
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img            
        
        return images, labels
            
    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train),(x_test, y_test)

In [None]:
class MnistDataProcessor:
    def __init__(self, pos_class, neg_class, training_samples=2000):
        input_path = '../input/mnist-dataset'
        training_images_filepath = join(input_path, 'train-images-idx3-ubyte/train-images-idx3-ubyte')
        training_labels_filepath = join(input_path, 'train-labels-idx1-ubyte/train-labels-idx1-ubyte')
        test_images_filepath = join(input_path, 't10k-images-idx3-ubyte/t10k-images-idx3-ubyte')
        test_labels_filepath = join(input_path, 't10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte')
        mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
        (self.__x_train, self.__y_train), (self.__x_test, self.__y_test) = mnist_dataloader.load_data()
        self.__train_pos_class = np.where(self.__y_train == np.uint8(pos_class))[0]
        self.__train_neg_class = np.where(self.__y_train == np.uint8(neg_class))[0]
        self.__test_pos_class = np.where(self.__y_test == np.uint8(pos_class))[0]
        self.__test_neg_class = np.where(self.__y_test == np.uint8(neg_class))[0]
        self.__train_samples = training_samples
    
    def train_data(self):
        random.shuffle(self.__train_pos_class)
        random.shuffle(self.__train_neg_class)
        train_positive_class_idx = self.__train_pos_class[:self.__train_samples]
        train_negative_class_idx = self.__train_neg_class[:self.__train_samples]
        _train_vector = np.array([self.__x_train[i] for i in train_positive_class_idx] + [self.__x_train[i] for i in train_negative_class_idx])
        nsamples, nx, ny = _train_vector.shape
        train_vector = _train_vector.reshape((nsamples,nx*ny))
        train_label = [1]*len(train_positive_class_idx) + [-1]*len(train_negative_class_idx)
        return train_vector, train_label
    
    def test_data(self):
        random.shuffle(self.__test_pos_class)
        random.shuffle(self.__test_neg_class)
        _test_vector = np.array([self.__x_test[i] for i in self.__test_pos_class] + [self.__x_test[i] for i in self.__test_neg_class])
        nsamples, nx, ny = _test_vector.shape
        test_vector = _test_vector.reshape((nsamples,nx*ny))
        test_label = [1]*len(self.__test_pos_class) + [-1]*len(self.__test_neg_class)
        return test_vector, test_label

In [None]:
dataset = MnistDataProcessor(3, 8)
X_train, y_train = dataset.train_data()
X_test, y_test = dataset.test_data()

(b) **Train an MLP each with 1, 2, 3 and 4 hidden layers using Backpropagation**

In [None]:
for hidden_layers_count in range(1, 5):
    clf = MLPClassifier(hidden_layer_sizes=tuple([100]*hidden_layers_count)).fit(X_train, y_train)
    print("Accuracy score for MLP classifier with {} hidden layers: {:.2f}%".format(hidden_layers_count, accuracy_score(clf.predict(X_test), y_test)*100))
    disp = plot_confusion_matrix(clf, X_test, y_test, cmap=plt.cm.Blues, normalize='true')
    disp.ax_.set_title("Confusion matrix for MLP classifier with {} hidden layers".format(hidden_layers_count))

(c) **Pick the best MLP based on (a) and (b) and vary the following:**
- Number of nodes in the hidden layers.
- Tanh, Relu, and Logistic activation functions
- Hyperparameters: Momentum term, Early stopping, and Learning Rate

In [None]:
param_grid = {
    'hidden_layer_sizes': [50,100],
    'activation': ['logistic', 'tanh', 'relu'],
    'early_stopping': [True, False],
    'momentum': [0.25, 0.5,0.75],
    'learning_rate': ['constant', 'adaptive']
}

In [None]:
result_grid = []
i=0
for hidden_layer_size in param_grid['hidden_layer_sizes']:
    for activation in param_grid['activation']:
        for early_stopping in param_grid['early_stopping']:
            for momentum in param_grid['momentum']:
                for learning_rate in param_grid['learning_rate']:
                    clf = MLPClassifier(hidden_layer_sizes=tuple([hidden_layer_size]*3), 
                                        activation = activation, early_stopping = early_stopping,
                                        momentum = momentum, learning_rate = learning_rate, solver = 'sgd').fit(X_train, y_train)
                    acc = accuracy_score(clf.predict(X_test), y_test)*100
                    res = {
                        'hidden_layer_size': hidden_layer_size,
                        'activation': activation,
                        'early_stopping': early_stopping,
                        'momentum': momentum,
                        'learning_rate': learning_rate,
                        'accuracy_score': acc
                    }
                    i+=1
                    print("Model trained: {}".format(i))
                    
                    result_grid.append(res)

In [None]:
pd.DataFrame(result_grid).head(50)

In [None]:
pd.DataFrame(result_grid).to_csv("classification_results.csv", index=False, header=True)

# Part B: MLP for Regression

In [None]:
class HousingDataLoader:
    def __init__(self):
        raw_data = pd.read_csv('../input/california-housing-prices/housing.csv')
        raw_data['total_bedrooms'] = raw_data.total_bedrooms.fillna(1)
        self.raw_data = raw_data.drop(columns=['longitude', 'latitude'])
    
    def get_data(self):
        X_cols = ['housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income']
        Y_col = 'median_house_value'
        data = self.raw_data.sample(frac=1).reset_index(drop=True)
        return train_test_split(data[X_cols], data[Y_col], test_size=0.25, random_state=42)

In [None]:
dataset = HousingDataLoader()
X_train, X_test,y_train, y_test = dataset.get_data()
for hidden_layers_count in range(1, 5):
    clf = MLPRegressor(hidden_layer_sizes=tuple([100]*hidden_layers_count)).fit(X_train, y_train)
    print("Squared error for MLP regressor with {} hidden layers: {:.2f}".format(hidden_layers_count, mean_squared_error(clf.predict(X_test), y_test)))

In [None]:
param_grid = {
    'hidden_layer_sizes': [50,100],
    'activation': ['logistic', 'tanh'],
    'early_stopping': [True, False],
    'momentum': [0.25, 0.5,0.75],
    'learning_rate': ['constant', 'adaptive']
}

In [None]:
reg_result_grid = []
i=0
for hidden_layer_size in param_grid['hidden_layer_sizes']:
    for activation in param_grid['activation']:
        for early_stopping in param_grid['early_stopping']:
            for momentum in param_grid['momentum']:
                for learning_rate in param_grid['learning_rate']:
                    clf = MLPRegressor(hidden_layer_sizes=tuple([hidden_layer_size]*3), 
                                        activation = activation, early_stopping = early_stopping,
                                        momentum = momentum, learning_rate = learning_rate, solver = 'sgd').fit(X_train, y_train)
                    score = mean_squared_error(clf.predict(X_test), y_test)
                    res = {
                        'hidden_layer_size': hidden_layer_size,
                        'activation': activation,
                        'early_stopping': early_stopping,
                        'momentum': momentum,
                        'learning_rate': learning_rate,
                        'squared_error': score
                    }
                    i+=1
                    print("Model trained: {}".format(i))
                    reg_result_grid.append(res)

In [None]:
pd.DataFrame(reg_result_grid).head(50)

In [None]:
pd.DataFrame(reg_result_grid).to_csv("regression_results.csv", index=False, header=True)