In [1]:
# Standard scientific Python imports
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Import datasets, classifiers and performance metrics
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC as SVMClassifier

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.preprocessing import StandardScaler

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)


def sort_by_target(mnist):
    reorder_train = np.array(sorted([(target, i) for i, target in enumerate(mnist.target[:60000])]))[:, 1]
    reorder_test = np.array(sorted([(target, i) for i, target in enumerate(mnist.target[60000:])]))[:, 1]
    mnist.data[:60000] = mnist.data[reorder_train]
    mnist.target[:60000] = mnist.target[reorder_train]
    mnist.data[60000:] = mnist.data[reorder_test + 60000]
    mnist.target[60000:] = mnist.target[reorder_test + 60000]

In [3]:
# Get digit MNIST handwritten digit dataset
mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.target = mnist.target.astype(np.int8) # fetch_openml() returns targets as strings
#sort_by_target(mnist) # fetch_openml() returns an unsorted dataset


In [None]:
# Get data and labels of MNIST dataset
X, y = mnist["data"], mnist["target"]

# Print to show there are 7000 images with 28x28 pixels
print("Image Data Shape:" , X.shape)

# Print to show there are 7000 labels (integers from 0-9)
print("Label Data Shape:", y.shape)

In [None]:
# Print target classes in the dataset
print("Classes:", np.unique(y))

In [None]:

some_digit = X[1]
some_digit_image = some_digit.reshape(28, 28)
plt.imshow(some_digit_image, cmap = mpl.cm.binary,
           interpolation="nearest")
plt.axis("off")
plt.show()

In [None]:
num_row = 2
num_col = 5# plot images
fig, axes = plt.subplots(num_row, num_col, figsize=(1.5*num_col,2*num_row))
for i in np.unique(y):
    ax = axes[i//num_col, i%num_col]
    some_digit = X[i]
    some_digit_image = some_digit.reshape(28, 28)
    ax.imshow(some_digit_image, cmap = mpl.cm.binary,
           interpolation="nearest")
    ax.set_title('Label: {}'.format(y[i]))
plt.tight_layout()
plt.show()

In [None]:
#Split the dataset into train and test partitions
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.20)

print("Train Data Size: {}".format(y_train.size))
print("Test Data Size: {}".format(y_test.size))

In [None]:
## BINARY CLASSIFICATION PROBLEM (WHETHER THE DIGIT IS 5 or NOT)
y_train_5 = (y_train == 5)
y_test_5 = (y_test == 5)

In [None]:
mlp_relu=MLPClassifier(solver='sgd', hidden_layer_sizes=(4,3,2), activation='relu', 
                   max_iter=30, verbose=True, early_stopping=True)
mlp_tanh=MLPClassifier(solver='sgd', hidden_layer_sizes=(4,3, 2), activation='tanh', 
                   max_iter=30,verbose=True,early_stopping=True)
mlp_sigmoid=MLPClassifier(solver='sgd', hidden_layer_sizes=(4,3,2), activation='logistic', 
                   max_iter=30,verbose=True,early_stopping=True) 

In [None]:
print("\nTRAINING for MLP with RELU ACTIVATION STARTED:\n")
mlp_relu.fit(x_train, y_train_5)

print("\nTRAINING for MLP with TANH ACTIVATION STARTED:\n")
mlp_tanh.fit(x_train, y_train_5)

print("\nTRAINING for MLP with SIGMOID ACTIVATION STARTED:\n")
mlp_sigmoid.fit(x_train, y_train_5)


In [None]:
plt.title("Loss Curves on MLP Training")
plt.plot(mlp_sigmoid.loss_curve_, label='MLP with Sigmoid Activation')
plt.plot(mlp_relu.loss_curve_, label='MLP with ReLU Activation')
plt.plot(mlp_tanh.loss_curve_, label='MLP with Tanh Activation')
plt.legend()


In [None]:
y_scores_relu = mlp_relu.predict_proba(x_test)
y_scores_tanh = mlp_tanh.predict_proba(x_test)
y_scores_sigmoid = mlp_sigmoid.predict_proba(x_test)


In [None]:
#Show Probability of being 5 on test samples
num_row = 2
num_col = 5# plot images
fig, axes = plt.subplots(num_row, num_col, figsize=(1.5*num_col, 2*num_row))
for i in np.unique(y):
    ax = axes[i//num_col, i%num_col]
    some_digit = X[32+i]
    prob=mlp_tanh.predict_proba([some_digit])
    some_digit_image = some_digit.reshape(28, 28)
    ax.set_title('P(5): {:.2f}'.format(prob[0,1]))
    ax.imshow(some_digit_image, cmap = mpl.cm.binary,
           interpolation="nearest")
plt.tight_layout()
plt.show()

In [None]:
# Comparison of ROC curves for different activation functions
fpr_tanh, tpr_tanh, thresholds_tanh = roc_curve(y_test_5, y_scores_tanh[:,1])
fpr_sigmoid, tpr_sigmoid, thresholds_sigmoid = roc_curve(y_test_5, y_scores_sigmoid[:,1])
fpr_relu, tpr_relu, thresholds_relu = roc_curve(y_test_5, y_scores_relu[:,1])

   
plt.title("ROC Curve of predicting 5 for different activation fucntions on MLP")
plt.plot(fpr_tanh,tpr_tanh, label='MLP with Tanh Activation')
plt.plot(fpr_sigmoid,tpr_sigmoid, label='MLP with Sigmoid Activation')
plt.plot(fpr_relu,tpr_relu, label='MLP with ReLU Activation')
plt.legend()
plt.plot([0, 1], [0, 1], 'k--')
plt.axis([0, 1, 0, 1])
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate', fontsize=16)



In [None]:
# MEASURING EFFECT OF STANDARDIZATION
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train.astype(np.float64))
x_test_scaled = scaler.transform(x_test.astype(np.float64))

print("\nTRAINING for MLP with TANH ACTIVATION STARTED:\n")
mlp_tanh.fit(x_train_scaled, y_train_5)

y_scaled_scores_tanh = mlp_tanh.predict_proba(x_test_scaled)

fpr_sclaed_tanh, tpr_scaled_tanh, thresholds_scaled_tanh = roc_curve(y_test_5, y_scaled_scores_tanh[:,1])

In [None]:

plt.title("ROC Curve of predicting 5 with/without standardization on MLP")
plt.plot(fpr_tanh,tpr_tanh, label='MLP without standardization')
plt.plot(fpr_sclaed_tanh,tpr_scaled_tanh, label='MLP with standardization')
plt.legend()
plt.plot([0, 1], [0, 1], 'k--')
plt.axis([0, 1, 0, 1])
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate', fontsize=16)

In [None]:
# Train Multi-class classifier
print("\nTRAINING for MLP with TANH ACTIVATION STARTED:\n")
mlp_tanh=MLPClassifier(solver='sgd', hidden_layer_sizes=(20, 15, 10), activation='tanh', 
                   max_iter=60,verbose=True,early_stopping=True)

mlp_tanh.fit(x_train_scaled, y_train)
y_pred_scaled_tanh = mlp_tanh.predict(x_test_scaled)


In [None]:
#Find the accuracy of system on test data
accuracy=accuracy_score(y_test,y_pred_scaled_tanh)
print("Test Accuracy: {}".format(accuracy))


In [None]:
plot_confusion_matrix(mlp_tanh, x_test_scaled, y_test, display_labels=np.unique(y))  
plt.show()

In [None]:
def plot_digit(data):
    image = data.reshape(28, 28)
    plt.imshow(image, cmap = mpl.cm.binary,
               interpolation="nearest")
    plt.axis("off")

# EXTRA
def plot_digits(instances, images_per_row=10, **options):
    size = 28
    images_per_row = min(len(instances), images_per_row)
    images = [instance.reshape(size,size) for instance in instances]
    n_rows = (len(instances) - 1) // images_per_row + 1
    row_images = []
    n_empty = n_rows * images_per_row - len(instances)
    images.append(np.zeros((size, size * n_empty)))
    for row in range(n_rows):
        rimages = images[row * images_per_row : (row + 1) * images_per_row]
        row_images.append(np.concatenate(rimages, axis=1))
    image = np.concatenate(row_images, axis=0)
    plt.imshow(image, cmap = mpl.cm.binary, **options)
    #plt.axis("off")
    
    
cl_a, cl_b = 7, 9
X_aa = x_test_scaled[(y_test == cl_a) & (y_pred_scaled_tanh == cl_a)]
X_ab = x_test_scaled[(y_test == cl_a) & (y_pred_scaled_tanh == cl_b)]
X_ba = x_test_scaled[(y_test == cl_b) & (y_pred_scaled_tanh == cl_a)]
X_bb = x_test_scaled[(y_test == cl_b) & (y_pred_scaled_tanh == cl_b)]

plt.figure(figsize=(28,28))
plt.subplot(221); plot_digits(X_aa[1:10], images_per_row=3)
plt.subplot(222); plot_digits(X_ab[1:10], images_per_row=3)
plt.subplot(223); plot_digits(X_ba[1:10], images_per_row=3)
plt.subplot(224); plot_digits(X_bb[1:10], images_per_row=3)
plt.show()

