# Import 

In [None]:
import pathlib
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

from LoadData import LoadData

plt.style.use('ggplot')
dataset_name = "MNIST"

reduced_datasets = {"PrincipalComponentReduced_MNIST": "PrincipalComponentReduced_MNIST.csv",
                    "RandomForestReduced_MNIST": "RandomForestReduced_MNIST.csv",
                    "IndependentComponentReduced_MNIST": "IndependentComponentReduced_MNIST.csv",
                    "RandomProjectionReduced_MNIST": "RandomProjectionReduced_MNIST.csv"}

accuracy = []
run_time = []

temp = np.loadtxt("PrincipalComponentReduced_MNIST.csv", delimiter=",")
size = temp.shape[0]
print(size)

In [None]:
# Baseline
cwd = pathlib.Path().absolute()

training_labels, training_data, _ = LoadData("{}/mnist-train-data.csv".format(cwd), normalize=True, size=size)

X_train, X_test, y_train, y_test = train_test_split(training_data, training_labels, test_size=0.33, random_state=0)
print(training_data.shape)

start_time = time.time()

nn_estimator = MLPClassifier(hidden_layer_sizes=(100,), verbose=3, n_iter_no_change=30)
nn_estimator.fit(X_train, y_train)

end_time = time.time()

elapsed_time = round((end_time - start_time), 2)
run_time.append(elapsed_time)
testing_acc = round(nn_estimator.score(X_test, y_test), 2)
accuracy.append(testing_acc)

df = pd.DataFrame(nn_estimator.loss_curve_, columns=["Loss"])
df.plot()
plt.title("{} Loss \n Testing Accuracy={}%\nElapsed Time={}sec".format("Baseline", testing_acc, elapsed_time))
plt.xlabel("Iterations")
plt.xlim(0, 105)
plt.ylim(0, 1.05)
plt.ylabel("Loss")
plt.tight_layout()
plt.savefig("{}_Loss.png".format("Baseline"))

print("Elapsed Time: {}".format(elapsed_time))
print("Training set score: %f" % nn_estimator.score(X_train, y_train))
print("Test set score: %f" % testing_acc)

In [None]:
for key, val in reduced_datasets.items():
    print(key)

    training_data = np.loadtxt(reduced_datasets[key], delimiter=",")
    training_labels, _, _ = LoadData("{}/mnist-train-data.csv".format(cwd), normalize=False,
                                     size=training_data.shape[0])

    X_train, X_test, y_train, y_test = train_test_split(training_data, training_labels, test_size=0.33, random_state=0)
    print(training_data.shape)
    start_time = time.time()
    if key == "IndependentComponentReduced_MNIST":
        nn_estimator = MLPClassifier(hidden_layer_sizes=(100,), verbose=3, learning_rate_init=0.001)
    else:
        nn_estimator = MLPClassifier(hidden_layer_sizes=(100,), verbose=3)
    nn_estimator.fit(X_train, y_train)

    end_time = time.time()
    elapsed_time = round((end_time - start_time), 3)

    run_time.append(elapsed_time)
    testing_acc = round(nn_estimator.score(X_test, y_test), 2)
    accuracy.append(testing_acc)

    df = pd.DataFrame(nn_estimator.loss_curve_, columns=["Loss"])
    df.plot()
    plt.title("{} Loss \n Testing Accuracy={}%\nElapsed Time={}sec".format(key, testing_acc, elapsed_time))
    plt.xlabel("Iterations")
    plt.xlim(0, 105)
    plt.ylim(0, 1.05)
    plt.ylabel("Loss")
    plt.tight_layout()
    plt.savefig("{}_Loss.png".format(key))

    print("Elapsed Time: {}".format(elapsed_time))
    print("Training set score: %f" % nn_estimator.score(X_train, y_train))
    print("Test set score: %f" % testing_acc)

plt.close('all')

d = {"Baseline": accuracy[0], "PCA": accuracy[1], "RandomForest": accuracy[2], "ICA": accuracy[3],
     "RandomProjection": accuracy[4]}
testing_acc_df = pd.DataFrame(data=d, index=[0])
testing_acc_df.plot(kind="bar", title="Testing Accuracy on Dimensionality Reduced Dataset", use_index=False)
plt.xlabel(["Baseline", "PCA", "RandomForest", "ICA", "RandomProjection"])
plt.ylabel("Accuracy(%)")
plt.tight_layout()
plt.savefig("TestingAccuracyDimensionReduced.png")

d2 = {"Baseline": run_time[0], "PCA": run_time[1], "RandomForest": run_time[2], "ICA": run_time[3],
      "RandomProjection": run_time[4]}
run_time_df = pd.DataFrame(data=d2, index=[0])
run_time_df.plot(kind="bar", title="Run Time on Dimensionality Reduced Dataset", use_index=False)
plt.ylabel("Time(sec)")
plt.xlabel(["Baseline", "PCA", "RandomForest", "ICA", "RandomProjection"])
plt.tight_layout()
plt.savefig("RunTimeDimensionReduced.png")
print()