# Problem 1: MLP Classifier

Import libraries

In [4]:
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

In [5]:
import sklearn.neural_network
import sklearn.model_selection

Load in training and validation data

In [6]:
data_dir = os.path.abspath("data_fashion/")

train_x = pd.read_csv(os.path.join(data_dir, "x_train.csv")).to_numpy()
train_y = pd.read_csv(os.path.join(data_dir, "y_train.csv")).to_numpy()[:,1].astype(np.int32)

valid_x = pd.read_csv(os.path.join(data_dir, "x_valid.csv")).to_numpy()
valid_y = pd.read_csv(os.path.join(data_dir, "y_valid.csv")).to_numpy()[:,1].astype(np.int32)

print(train_x.shape)
print(train_y.shape)

print(valid_x.shape)
print(valid_y.shape)

print(train_y)

(2102, 784)
(2102,)
(600, 784)
(600,)
[2 0 3 ... 5 5 5]


Count number of each class

In [34]:
dress_count_tr = np.count_nonzero(train_y == 3)
dress_count_va = np.count_nonzero(valid_y == 3)
pullover_count_tr = np.count_nonzero(train_y == 2)
pullover_count_va = np.count_nonzero(valid_y == 2)
top_count_tr = np.count_nonzero(train_y == 0)
top_count_va = np.count_nonzero(valid_y == 0)
trouser_count_tr = np.count_nonzero(train_y == 1)
trouser_count_va = np.count_nonzero(valid_y == 1)
sandal_count_tr = np.count_nonzero(train_y == 5)
sandal_count_va = np.count_nonzero(valid_y == 5)
sneaker_count_tr = np.count_nonzero(train_y == 7)
sneaker_count_va = np.count_nonzero(valid_y == 7)

print(dress_count_tr)
print(dress_count_va)
print(pullover_count_tr)
print(pullover_count_va)
print(top_count_tr)
print(top_count_va)
print(trouser_count_tr)
print(trouser_count_va)
print(sandal_count_tr)
print(sandal_count_va)
print(sneaker_count_tr)
print(sneaker_count_va)

400
100
100
100
1
100
1
100
800
100
800
100


Build base model

In [8]:
base_mlp = sklearn.neural_network.MLPClassifier(
    hidden_layer_sizes=(800,),
    activation='relu',
    solver='lbfgs',
    max_iter=1000000,
    random_state=1
)

# activation, solver, hidden layer, alpha

Create custom splitter for grid search

In [9]:
# custom splitter code

xall_LF = np.vstack([train_x, valid_x])
yall_L = np.hstack([train_y, valid_y])

valid_indicators_L = np.hstack([
    -1 * np.ones(train_y.size),
    0  * np.ones(valid_y.size),
    ])

my_splitter = sklearn.model_selection.PredefinedSplit(valid_indicators_L)

Create Grid Searcher

In [10]:
mlp_hyperparam_grid = dict(
    hidden_layer_sizes=[(8,), (16,), (32,), (64,), (128,), (256,), (512,), (1024,)],
    alpha=np.logspace(-5, 5, 11)
)

In [11]:
mlp_grid_searcher = sklearn.model_selection.GridSearchCV(
    base_mlp, mlp_hyperparam_grid, scoring='balanced_accuracy', 
    cv=my_splitter, return_train_score=True, refit=False)

Fit grid searcher

In [12]:

mlp_grid_searcher.fit(xall_LF, yall_L)


KeyboardInterrupt: 

Get and format the Grid Search Results

In [18]:
mlp_gsearch_results_df = pd.DataFrame(mlp_grid_searcher.cv_results_).copy()

# Rearrange row order so it is easy to skim
mlp_gsearch_results_df.sort_values(by='rank_test_score', inplace=True)
print(mlp_gsearch_results_df[['split0_test_score', 'rank_test_score', 'param_hidden_layer_sizes', 'param_alpha']])

    split0_test_score  rank_test_score param_hidden_layer_sizes param_alpha
28           0.725000                1                    (16,)         0.1
39           0.680000                2                   (256,)         1.0
22           0.678333                3                    (32,)        0.01
38           0.671667                4                   (128,)         1.0
8            0.666667                5                    (32,)      0.0001
..                ...              ...                      ...         ...
6            0.330000               69                  (1024,)     0.00001
34           0.330000               69                  (1024,)         0.1
41           0.330000               69                  (1024,)         1.0
27           0.330000               69                  (1024,)        0.01
48           0.330000               69                  (1024,)        10.0

[77 rows x 4 columns]


In [17]:
print(mlp_grid_searcher.best_params_)

{'alpha': 0.1, 'hidden_layer_sizes': (16,)}


In [None]:
mlp_gsearch_results_df.to_csv("my_results.txt") 

Add dupes into training data

In [35]:
train_x_dups = train_x
train_y_dups = train_y

size_tr = train_y_dups.shape[0]
for i in range(size_tr):
        # if it is a dress
        if train_y_dups[i] == 3:
                train_y_dups = np.append(train_y_dups, train_y_dups[i])
                train_x_dups = np.append(train_x_dups, train_x_dups[i])
        elif train_y_dups[i] == 2:
                for x in range(7):
                        train_y_dups = np.append(train_y_dups, train_y_dups[i])
                        train_x_dups = np.append(train_x_dups, train_x_dups[i])
        elif train_y[i] == 0 or train_y[i] == 1:
                for x in range(799):
                        train_y_dups = np.append(train_y_dups, train_y_dups[i])
                        train_x_dups = np.append(train_x_dups, train_x_dups[i])

In [36]:
dress_count_tr = np.count_nonzero(train_y_dups == 3)
pullover_count_tr = np.count_nonzero(train_y_dups == 2)
top_count_tr = np.count_nonzero(train_y_dups == 0)
trouser_count_tr = np.count_nonzero(train_y_dups == 1)
sandal_count_tr = np.count_nonzero(train_y_dups == 5)
sneaker_count_tr = np.count_nonzero(train_y_dups == 7)

print(dress_count_tr)
print(pullover_count_tr)
print(top_count_tr)
print(trouser_count_tr)
print(sandal_count_tr)
print(sneaker_count_tr)

800
800
800
800
800
800
