# Homework 5

In [1]:
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from scipy.stats import uniform
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV


## Download Data

In [2]:
df = pd.read_csv('train.csv')
print(df.head())
print(df.info())


   label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \
0      1       0       0       0       0       0       0       0       0   
1      0       0       0       0       0       0       0       0       0   
2      1       0       0       0       0       0       0       0       0   
3      4       0       0       0       0       0       0       0       0   
4      0       0       0       0       0       0       0       0       0   

   pixel8  ...  pixel774  pixel775  pixel776  pixel777  pixel778  pixel779  \
0       0  ...         0         0         0         0         0         0   
1       0  ...         0         0         0         0         0         0   
2       0  ...         0         0         0         0         0         0   
3       0  ...         0         0         0         0         0         0   
4       0  ...         0         0         0         0         0         0   

   pixel780  pixel781  pixel782  pixel783  
0         0         0         

In [3]:
df_test = pd.read_csv('test.csv')
df_test.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Data Preparation

### Exporting first 10 rows as images

In [4]:
i = 0
with open('train.csv', 'r') as csv_file:
    csv_reader = csv.reader(csv_file)
    next(csv_reader)  # Skip header row if exists
    
    for data in csv_reader:
        # First column is label (0-9)
        label = int(data[0])
        
        # Remaining columns are pixels (0-255)
        pixels = data[1:]  # Skip first column
        pixels = np.array(pixels, dtype='uint8')
        
        # Reshape to 28x28
        pixels = pixels.reshape((28, 28))
        
        # Save image
        plt.imshow(pixels, cmap='pink')
        plt.title(f'Label: {label}')
        plt.savefig(f'image_{i}_label_{label}.png')
        plt.close()
        
        i += 1
        
        if i >= 10:  # Limit to first 10 images for testing
            break

### See if there are any missing values

In [5]:
print("\nMissing Values:")
print(df.isnull().sum()[df.isnull().sum() > 0])


Missing Values:
Series([], dtype: int64)


### Split data into $X$ and $y$

In [6]:
X = df.drop('label', axis=1)
y = df['label']

### Normalize pixel values

In [7]:
# for training set
X = X / 255

#for test set
df_test = df_test / 255


## Build and train model

### Train test split

In [8]:
# Split data for a train and validation set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Fitting

In [9]:
# initialize the model
mlp = MLPClassifier(activation='logistic') 

# fit the model
mlp.fit(X_train, y_train)

# make predictions
y_pred = mlp.predict(X_test)

# calculate accuracy
mlp.score(X_test, y_test)

0.9716666666666667

### Hyperparameter tuning

In [10]:
# set the hyperparameter distributions
param_dist = {
    'hidden_layer_sizes': [(50,), (100,), (150,)],
    'alpha': uniform(0.0001, 0.01),
}

# Setup RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=mlp,
    param_distributions=param_dist,
    n_iter=100,
    cv=3,
    random_state=42,
    scoring='accuracy',
    n_jobs=-1,
    verbose=2
)

# Perform Random Search Cross-Validation
random_search.fit(X_train, y_train)

# Get the best parameters from RandomSearchCV
best_params = random_search.best_params_

Fitting 3 folds for each of 100 candidates, totalling 300 fits
[CV] END alpha=0.001934347898661638, hidden_layer_sizes=(50,); total time= 3.2min
[CV] END alpha=0.001934347898661638, hidden_layer_sizes=(50,); total time= 3.5min
[CV] END alpha=0.0038454011884736248, hidden_layer_sizes=(50,); total time= 3.6min




[CV] END alpha=0.0038454011884736248, hidden_layer_sizes=(50,); total time= 3.8min
[CV] END alpha=0.001934347898661638, hidden_layer_sizes=(50,); total time= 3.8min




[CV] END alpha=0.0038454011884736248, hidden_layer_sizes=(50,); total time= 3.8min
[CV] END alpha=0.006068501579464871, hidden_layer_sizes=(100,); total time= 4.1min
[CV] END alpha=0.006068501579464871, hidden_layer_sizes=(100,); total time= 5.0min
[CV] END alpha=0.004692488919658672, hidden_layer_sizes=(50,); total time= 7.5min




[CV] END alpha=0.004692488919658672, hidden_layer_sizes=(50,); total time= 7.9min




[CV] END alpha=0.004692488919658672, hidden_layer_sizes=(50,); total time= 7.9min
[CV] END alpha=0.0016599452033620266, hidden_layer_sizes=(150,); total time= 8.3min
[CV] END alpha=0.006068501579464871, hidden_layer_sizes=(100,); total time=10.0min
[CV] END alpha=0.0016599452033620266, hidden_layer_sizes=(150,); total time=20.5min
[CV] END alpha=0.0016599452033620266, hidden_layer_sizes=(150,); total time=20.2min
[CV] END alpha=0.0061111501174320884, hidden_layer_sizes=(150,); total time=60.5min
[CV] END alpha=0.0003058449429580245, hidden_layer_sizes=(100,); total time=54.9min
[CV] END alpha=0.0003058449429580245, hidden_layer_sizes=(100,); total time=55.2min
[CV] END alpha=0.0003058449429580245, hidden_layer_sizes=(100,); total time=55.5min
[CV] END alpha=0.007319987722668248, hidden_layer_sizes=(100,); total time=63.9min
[CV] END alpha=0.0061111501174320884, hidden_layer_sizes=(150,); total time=76.7min
[CV] END alpha=0.007319987722668248, hidden_layer_sizes=(100,); total time=65.5m



[CV] END alpha=0.005347564316322379, hidden_layer_sizes=(50,); total time=11.0min
[CV] END alpha=0.0062748150962771655, hidden_layer_sizes=(100,); total time=11.9min
[CV] END alpha=0.005347564316322379, hidden_layer_sizes=(50,); total time= 9.7min
[CV] END alpha=0.005347564316322379, hidden_layer_sizes=(50,); total time=10.4min




[CV] END alpha=0.0062748150962771655, hidden_layer_sizes=(100,); total time=15.3min
[CV] END alpha=0.003012291401980419, hidden_layer_sizes=(150,); total time=15.2min
[CV] END alpha=0.003012291401980419, hidden_layer_sizes=(150,); total time=15.7min
[CV] END alpha=0.004098609717152555, hidden_layer_sizes=(150,); total time=14.9min
[CV] END alpha=0.003012291401980419, hidden_layer_sizes=(150,); total time=16.9min
[CV] END alpha=0.004098609717152555, hidden_layer_sizes=(150,); total time=16.6min
[CV] END alpha=0.004660699842170359, hidden_layer_sizes=(150,); total time=14.5min
[CV] END alpha=0.004098609717152555, hidden_layer_sizes=(150,); total time=15.9min
[CV] END alpha=0.004660699842170359, hidden_layer_sizes=(150,); total time=17.6min
[CV] END alpha=0.004660699842170359, hidden_layer_sizes=(150,); total time=58.7min




[CV] END alpha=0.009932308858067881, hidden_layer_sizes=(50,); total time=69.6min




[CV] END alpha=0.009932308858067881, hidden_layer_sizes=(50,); total time=69.6min
[CV] END alpha=0.006283860093330873, hidden_layer_sizes=(150,); total time=76.7min




[CV] END alpha=0.009932308858067881, hidden_layer_sizes=(50,); total time=69.6min
[CV] END alpha=0.006283860093330873, hidden_layer_sizes=(150,); total time=73.6min
[CV] END alpha=0.0005645041271999773, hidden_layer_sizes=(150,); total time=69.6min
[CV] END alpha=0.006283860093330873, hidden_layer_sizes=(150,); total time=74.3min
[CV] END alpha=0.0005645041271999773, hidden_layer_sizes=(150,); total time=33.3min
[CV] END alpha=0.0005645041271999773, hidden_layer_sizes=(150,); total time=17.3min




[CV] END alpha=0.006903075385877797, hidden_layer_sizes=(50,); total time=17.6min




[CV] END alpha=0.006903075385877797, hidden_layer_sizes=(50,); total time=17.6min
[CV] END alpha=0.0007505159298527952, hidden_layer_sizes=(50,); total time=15.6min




[CV] END alpha=0.006903075385877797, hidden_layer_sizes=(50,); total time=17.5min
[CV] END alpha=0.0007505159298527952, hidden_layer_sizes=(50,); total time=15.5min
[CV] END alpha=0.0007505159298527952, hidden_layer_sizes=(50,); total time=15.2min
[CV] END alpha=0.009756320330745593, hidden_layer_sizes=(100,); total time=12.5min
[CV] END alpha=0.009756320330745593, hidden_layer_sizes=(100,); total time=11.6min
[CV] END alpha=0.0010767211400638387, hidden_layer_sizes=(150,); total time=10.6min
[CV] END alpha=0.003954165025399161, hidden_layer_sizes=(100,); total time=12.0min
[CV] END alpha=0.003954165025399161, hidden_layer_sizes=(100,); total time=11.9min
[CV] END alpha=0.0010767211400638387, hidden_layer_sizes=(150,); total time=11.2min
[CV] END alpha=0.003954165025399161, hidden_layer_sizes=(100,); total time=12.9min




[CV] END alpha=0.009756320330745593, hidden_layer_sizes=(100,); total time=14.4min
[CV] END alpha=0.006932635188254582, hidden_layer_sizes=(50,); total time= 9.6min




[CV] END alpha=0.006932635188254582, hidden_layer_sizes=(50,); total time=10.3min
[CV] END alpha=0.0010767211400638387, hidden_layer_sizes=(150,); total time=11.7min




[CV] END alpha=0.006932635188254582, hidden_layer_sizes=(50,); total time=10.1min
[CV] END alpha=0.008431949117361642, hidden_layer_sizes=(150,); total time=13.4min
[CV] END alpha=0.008431949117361642, hidden_layer_sizes=(150,); total time=14.5min
[CV] END alpha=0.008431949117361642, hidden_layer_sizes=(150,); total time=14.3min
[CV] END alpha=0.009193204020787821, hidden_layer_sizes=(100,); total time=14.3min
[CV] END alpha=0.009193204020787821, hidden_layer_sizes=(100,); total time=12.9min
[CV] END alpha=0.00672522284353982, hidden_layer_sizes=(100,); total time=13.6min
[CV] END alpha=0.00672522284353982, hidden_layer_sizes=(100,); total time=14.1min
[CV] END alpha=0.009193204020787821, hidden_layer_sizes=(100,); total time=15.0min
[CV] END alpha=0.004351558744912447, hidden_layer_sizes=(100,); total time=14.3min
[CV] END alpha=0.00672522284353982, hidden_layer_sizes=(100,); total time=15.9min
[CV] END alpha=0.004351558744912447, hidden_layer_sizes=(100,); total time=16.4min
[CV] END



[CV] END alpha=0.007851328233611145, hidden_layer_sizes=(100,); total time=34.7min
[CV] END alpha=0.004051502360018145, hidden_layer_sizes=(100,); total time=12.6min
[CV] END alpha=0.004051502360018145, hidden_layer_sizes=(100,); total time=13.5min
[CV] END alpha=0.00737271995856421, hidden_layer_sizes=(150,); total time=15.7min
[CV] END alpha=0.00737271995856421, hidden_layer_sizes=(150,); total time=17.1min
[CV] END alpha=0.000984925020519195, hidden_layer_sizes=(150,); total time=12.4min
[CV] END alpha=0.000984925020519195, hidden_layer_sizes=(150,); total time=11.4min
[CV] END alpha=0.000984925020519195, hidden_layer_sizes=(150,); total time=11.7min
[CV] END alpha=0.00737271995856421, hidden_layer_sizes=(150,); total time=18.5min
[CV] END alpha=0.005308342600258237, hidden_layer_sizes=(100,); total time=12.2min
[CV] END alpha=0.005308342600258237, hidden_layer_sizes=(100,); total time=13.1min
[CV] END alpha=0.005308342600258237, hidden_layer_sizes=(100,); total time=11.6min




[CV] END alpha=0.0033533033076326435, hidden_layer_sizes=(50,); total time=10.4min




[CV] END alpha=0.0033533033076326435, hidden_layer_sizes=(50,); total time=10.7min
[CV] END alpha=0.0033533033076326435, hidden_layer_sizes=(50,); total time=10.5min
[CV] END alpha=0.002813490317738959, hidden_layer_sizes=(50,); total time=11.2min
[CV] END alpha=0.002813490317738959, hidden_layer_sizes=(50,); total time=12.6min
[CV] END alpha=0.002813490317738959, hidden_layer_sizes=(50,); total time=30.1min




[CV] END alpha=0.005967511656638482, hidden_layer_sizes=(50,); total time=30.5min




[CV] END alpha=0.005967511656638482, hidden_layer_sizes=(50,); total time=30.3min




[CV] END alpha=0.005967511656638482, hidden_layer_sizes=(50,); total time=30.2min
[CV] END alpha=0.0029093450968738074, hidden_layer_sizes=(50,); total time=29.5min
[CV] END alpha=0.0029093450968738074, hidden_layer_sizes=(50,); total time=29.3min




[CV] END alpha=0.0029093450968738074, hidden_layer_sizes=(50,); total time=43.4min
[CV] END alpha=0.0015092422497476265, hidden_layer_sizes=(150,); total time=44.3min
[CV] END alpha=0.0015092422497476265, hidden_layer_sizes=(150,); total time=26.6min
[CV] END alpha=0.001752669390630025, hidden_layer_sizes=(50,); total time=25.1min
[CV] END alpha=0.001752669390630025, hidden_layer_sizes=(50,); total time=24.9min
[CV] END alpha=0.001752669390630025, hidden_layer_sizes=(50,); total time=24.7min
[CV] END alpha=0.0015092422497476265, hidden_layer_sizes=(150,); total time=28.1min




[CV] END alpha=0.009968869366005172, hidden_layer_sizes=(50,); total time=25.4min




[CV] END alpha=0.009968869366005172, hidden_layer_sizes=(50,); total time=11.1min




[CV] END alpha=0.009968869366005172, hidden_layer_sizes=(50,); total time=11.9min
[CV] END alpha=0.008254614284548342, hidden_layer_sizes=(50,); total time=12.2min




[CV] END alpha=0.008254614284548342, hidden_layer_sizes=(50,); total time=12.3min
[CV] END alpha=0.002087156815341724, hidden_layer_sizes=(150,); total time=14.5min




[CV] END alpha=0.008254614284548342, hidden_layer_sizes=(50,); total time=12.4min
[CV] END alpha=0.002087156815341724, hidden_layer_sizes=(150,); total time=15.0min
[CV] END alpha=0.002087156815341724, hidden_layer_sizes=(150,); total time=15.3min
[CV] END alpha=0.0072134195274865, hidden_layer_sizes=(150,); total time=14.2min




[CV] END alpha=0.007812703466859458, hidden_layer_sizes=(50,); total time=10.4min




[CV] END alpha=0.007812703466859458, hidden_layer_sizes=(50,); total time=10.8min




[CV] END alpha=0.007812703466859458, hidden_layer_sizes=(50,); total time=10.8min
[CV] END alpha=0.0072134195274865, hidden_layer_sizes=(150,); total time=33.3min
[CV] END alpha=0.009363008785133489, hidden_layer_sizes=(100,); total time=27.5min
[CV] END alpha=0.0072134195274865, hidden_layer_sizes=(150,); total time=31.0min




[CV] END alpha=0.009363008785133489, hidden_layer_sizes=(100,); total time=30.4min
[CV] END alpha=0.009363008785133489, hidden_layer_sizes=(100,); total time=30.0min
[CV] END alpha=0.0012586905952512973, hidden_layer_sizes=(150,); total time=27.0min
[CV] END alpha=0.0012586905952512973, hidden_layer_sizes=(150,); total time=27.2min
[CV] END alpha=0.0012586905952512973, hidden_layer_sizes=(150,); total time=29.5min
[CV] END alpha=0.008600385777897993, hidden_layer_sizes=(100,); total time=12.4min
[CV] END alpha=0.0010541011649041131, hidden_layer_sizes=(150,); total time=11.1min




[CV] END alpha=0.008600385777897993, hidden_layer_sizes=(100,); total time=14.6min
[CV] END alpha=0.008600385777897993, hidden_layer_sizes=(100,); total time=14.4min
[CV] END alpha=0.0032098232171566218, hidden_layer_sizes=(50,); total time= 8.2min
[CV] END alpha=0.0010541011649041131, hidden_layer_sizes=(150,); total time=11.6min
[CV] END alpha=0.0032098232171566218, hidden_layer_sizes=(50,); total time= 9.7min
[CV] END alpha=0.0010541011649041131, hidden_layer_sizes=(150,); total time=11.1min
[CV] END alpha=0.0032098232171566218, hidden_layer_sizes=(50,); total time= 9.5min
[CV] END alpha=0.007396061783380641, hidden_layer_sizes=(100,); total time=10.7min
[CV] END alpha=0.007396061783380641, hidden_layer_sizes=(100,); total time=11.5min
[CV] END alpha=0.007396061783380641, hidden_layer_sizes=(100,); total time=13.4min




[CV] END alpha=0.008972127425763265, hidden_layer_sizes=(50,); total time=12.6min




[CV] END alpha=0.008972127425763265, hidden_layer_sizes=(50,); total time=13.5min




[CV] END alpha=0.008972127425763265, hidden_layer_sizes=(50,); total time=14.0min
[CV] END alpha=0.003929268747537898, hidden_layer_sizes=(150,); total time=20.8min




[CV] END alpha=0.00723244787222995, hidden_layer_sizes=(50,); total time=18.9min
[CV] END alpha=0.00723244787222995, hidden_layer_sizes=(50,); total time=17.9min
[CV] END alpha=0.00723244787222995, hidden_layer_sizes=(50,); total time=17.5min
[CV] END alpha=0.003929268747537898, hidden_layer_sizes=(150,); total time=26.5min
[CV] END alpha=0.003929268747537898, hidden_layer_sizes=(150,); total time=26.4min
[CV] END alpha=0.007317295211648732, hidden_layer_sizes=(150,); total time=31.6min
[CV] END alpha=0.007317295211648732, hidden_layer_sizes=(150,); total time=34.4min
[CV] END alpha=0.0005043358953843135, hidden_layer_sizes=(150,); total time=24.8min
[CV] END alpha=0.0005043358953843135, hidden_layer_sizes=(150,); total time=24.5min
[CV] END alpha=0.007317295211648732, hidden_layer_sizes=(150,); total time=35.0min
[CV] END alpha=0.00780967179954561, hidden_layer_sizes=(150,); total time=37.5min
[CV] END alpha=0.00780967179954561, hidden_layer_sizes=(150,); total time=40.6min
[CV] END a



[CV] END alpha=0.0048537022318211175, hidden_layer_sizes=(50,); total time=24.3min
[CV] END alpha=0.0048537022318211175, hidden_layer_sizes=(50,); total time=22.4min
[CV] END alpha=0.005185706911647028, hidden_layer_sizes=(100,); total time=25.7min
[CV] END alpha=0.005185706911647028, hidden_layer_sizes=(100,); total time=22.1min
[CV] END alpha=0.001493314544058757, hidden_layer_sizes=(150,); total time=16.5min
[CV] END alpha=0.005185706911647028, hidden_layer_sizes=(100,); total time=19.7min
[CV] END alpha=0.001493314544058757, hidden_layer_sizes=(150,); total time=19.4min
[CV] END alpha=0.001493314544058757, hidden_layer_sizes=(150,); total time=10.7min
[CV] END alpha=0.002130612247347694, hidden_layer_sizes=(50,); total time= 4.8min
[CV] END alpha=0.0042038292303562975, hidden_layer_sizes=(150,); total time=12.2min
[CV] END alpha=0.0042038292303562975, hidden_layer_sizes=(150,); total time= 9.1min
[CV] END alpha=0.0042038292303562975, hidden_layer_sizes=(150,); total time=24.2min
[C



[CV] END alpha=0.002130612247347694, hidden_layer_sizes=(50,); total time=38.6min
[CV] END alpha=0.00086979909828793, hidden_layer_sizes=(150,); total time=66.0min
[CV] END alpha=0.00086979909828793, hidden_layer_sizes=(150,); total time=67.3min
[CV] END alpha=0.007047849330397046, hidden_layer_sizes=(100,); total time=68.0min
[CV] END alpha=0.00086979909828793, hidden_layer_sizes=(150,); total time=70.0min
[CV] END alpha=0.007047849330397046, hidden_layer_sizes=(100,); total time=71.2min
[CV] END alpha=0.007047849330397046, hidden_layer_sizes=(100,); total time=54.2min
[CV] END alpha=0.006343540481337933, hidden_layer_sizes=(100,); total time=56.6min
[CV] END alpha=0.006343540481337933, hidden_layer_sizes=(100,); total time=40.5min
[CV] END alpha=0.006343540481337933, hidden_layer_sizes=(100,); total time=14.2min
[CV] END alpha=0.006434037565104235, hidden_layer_sizes=(100,); total time=13.3min
[CV] END alpha=0.006434037565104235, hidden_layer_sizes=(100,); total time=12.0min
[CV] END



[CV] END alpha=0.008932802589188682, hidden_layer_sizes=(100,); total time=14.2min
[CV] END alpha=0.008932802589188682, hidden_layer_sizes=(100,); total time=13.4min
[CV] END alpha=0.0032800347497186385, hidden_layer_sizes=(50,); total time= 9.8min
[CV] END alpha=0.0032800347497186385, hidden_layer_sizes=(50,); total time= 8.8min
[CV] END alpha=0.0032800347497186385, hidden_layer_sizes=(50,); total time= 9.7min




[CV] END alpha=0.0043710778862625635, hidden_layer_sizes=(50,); total time=10.3min




[CV] END alpha=0.0043710778862625635, hidden_layer_sizes=(50,); total time=10.4min
[CV] END alpha=0.002821322493846353, hidden_layer_sizes=(150,); total time=13.5min




[CV] END alpha=0.0043710778862625635, hidden_layer_sizes=(50,); total time=10.2min
[CV] END alpha=0.002821322493846353, hidden_layer_sizes=(150,); total time=14.6min
[CV] END alpha=0.002821322493846353, hidden_layer_sizes=(150,); total time=14.4min
[CV] END alpha=0.008707305832563433, hidden_layer_sizes=(150,); total time=11.3min
[CV] END alpha=0.008707305832563433, hidden_layer_sizes=(150,); total time=12.3min
[CV] END alpha=0.0017465585314294175, hidden_layer_sizes=(150,); total time=44.1min
[CV] END alpha=0.00427411003148779, hidden_layer_sizes=(50,); total time=41.6min
[CV] END alpha=0.008707305832563433, hidden_layer_sizes=(150,); total time=45.7min




[CV] END alpha=0.00427411003148779, hidden_layer_sizes=(50,); total time=41.9min
[CV] END alpha=0.0017465585314294175, hidden_layer_sizes=(150,); total time=43.4min
[CV] END alpha=0.0017465585314294175, hidden_layer_sizes=(150,); total time=43.3min




[CV] END alpha=0.00427411003148779, hidden_layer_sizes=(50,); total time=41.9min
[CV] END alpha=0.007024360328902704, hidden_layer_sizes=(100,); total time=41.3min
[CV] END alpha=0.007024360328902704, hidden_layer_sizes=(100,); total time= 5.1min
[CV] END alpha=0.007024360328902704, hidden_layer_sizes=(100,); total time= 5.1min
[CV] END alpha=0.0017829104217293057, hidden_layer_sizes=(50,); total time= 3.7min
[CV] END alpha=0.0017829104217293057, hidden_layer_sizes=(50,); total time= 3.4min
[CV] END alpha=0.0034761517140362796, hidden_layer_sizes=(150,); total time= 5.5min
[CV] END alpha=0.0034761517140362796, hidden_layer_sizes=(150,); total time= 5.6min
[CV] END alpha=0.0017829104217293057, hidden_layer_sizes=(50,); total time= 3.4min
[CV] END alpha=0.0034761517140362796, hidden_layer_sizes=(150,); total time= 6.8min
[CV] END alpha=0.004138361710580409, hidden_layer_sizes=(100,); total time= 5.0min
[CV] END alpha=0.004138361710580409, hidden_layer_sizes=(100,); total time= 5.8min
[CV



[CV] END alpha=0.009817820827209606, hidden_layer_sizes=(100,); total time= 6.2min
[CV] END alpha=0.007063042728397884, hidden_layer_sizes=(150,); total time= 6.2min
[CV] END alpha=0.007063042728397884, hidden_layer_sizes=(150,); total time=25.9min
[CV] END alpha=0.0031087830981676966, hidden_layer_sizes=(50,); total time=23.0min




[CV] END alpha=0.0031087830981676966, hidden_layer_sizes=(50,); total time=23.1min
[CV] END alpha=0.0031087830981676966, hidden_layer_sizes=(50,); total time=22.9min
[CV] END alpha=0.007063042728397884, hidden_layer_sizes=(150,); total time=25.4min
[CV] END alpha=0.010077404850489418, hidden_layer_sizes=(150,); total time=24.6min
[CV] END alpha=0.010077404850489418, hidden_layer_sizes=(150,); total time=24.8min
[CV] END alpha=0.010077404850489418, hidden_layer_sizes=(150,); total time=24.9min
[CV] END alpha=0.006195643339798969, hidden_layer_sizes=(100,); total time= 4.4min
[CV] END alpha=0.006195643339798969, hidden_layer_sizes=(100,); total time= 4.6min
[CV] END alpha=0.006195643339798969, hidden_layer_sizes=(100,); total time= 5.0min
[CV] END alpha=0.004210370133182313, hidden_layer_sizes=(150,); total time= 5.4min
[CV] END alpha=0.004210370133182313, hidden_layer_sizes=(150,); total time= 4.0min
[CV] END alpha=0.0028864646423661143, hidden_layer_sizes=(50,); total time= 3.2min
[CV]



[CV] END alpha=0.0037965445606140446, hidden_layer_sizes=(50,); total time=13.2min
[CV] END alpha=0.009956504541106007, hidden_layer_sizes=(150,); total time=17.4min
[CV] END alpha=0.0037965445606140446, hidden_layer_sizes=(50,); total time=11.3min
[CV] END alpha=0.009956504541106007, hidden_layer_sizes=(150,); total time=20.9min
[CV] END alpha=0.006821355474058786, hidden_layer_sizes=(150,); total time=18.9min
[CV] END alpha=0.006821355474058786, hidden_layer_sizes=(150,); total time=21.5min
[CV] END alpha=0.006821355474058786, hidden_layer_sizes=(150,); total time=20.1min




[CV] END alpha=0.0037965445606140446, hidden_layer_sizes=(50,); total time=11.0min




[CV] END alpha=0.0073821634861185965, hidden_layer_sizes=(50,); total time=10.6min
[CV] END alpha=0.0073821634861185965, hidden_layer_sizes=(50,); total time=26.1min




[CV] END alpha=0.0073821634861185965, hidden_layer_sizes=(50,); total time=26.1min
[CV] END alpha=0.0064230583059357955, hidden_layer_sizes=(100,); total time=25.6min
[CV] END alpha=0.0064230583059357955, hidden_layer_sizes=(100,); total time=26.0min
[CV] END alpha=0.0064230583059357955, hidden_layer_sizes=(100,); total time=56.8min
[CV] END alpha=0.004088244424445531, hidden_layer_sizes=(150,); total time=53.7min
[CV] END alpha=0.001002897700544083, hidden_layer_sizes=(50,); total time=36.7min
[CV] END alpha=0.004088244424445531, hidden_layer_sizes=(150,); total time=58.4min
[CV] END alpha=0.001002897700544083, hidden_layer_sizes=(50,); total time=37.0min
[CV] END alpha=0.004088244424445531, hidden_layer_sizes=(150,); total time=56.1min
[CV] END alpha=0.001002897700544083, hidden_layer_sizes=(50,); total time=37.8min
[CV] END alpha=0.0016071754396542947, hidden_layer_sizes=(150,); total time=40.3min
[CV] END alpha=0.0019651851039985424, hidden_layer_sizes=(50,); total time= 7.8min
[CV

In [13]:
best_params
best_model = mlp.set_params(**best_params)
best_model.fit(X_train, y_train)
best_accuracy = best_model.score(X_test, y_test)


In [14]:
best_accuracy

0.9745238095238096

In [None]:
# set the hyperparameter distributions
param_grid = {
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['sgd', 'adam'],
}


# Perform Grid Search Cross-Validation
grid_search = GridSearchCV(estimator=best_model, param_grid=param_grid, scoring='accuracy', cv=2, n_jobs = -1, verbose=2)
grid_search.fit(X, y)


# Get the best parameters from RandomSearchCV
best_params_3 = grid_search.best_params_


Fitting 2 folds for each of 6 candidates, totalling 12 fits
[CV] END .......................activation=tanh, solver=adam; total time= 2.5min
[CV] END .......................activation=tanh, solver=adam; total time= 3.4min
[CV] END ...................activation=logistic, solver=adam; total time= 4.2min
[CV] END ...................activation=logistic, solver=adam; total time= 5.6min




[CV] END ........................activation=tanh, solver=sgd; total time= 6.0min
[CV] END ........................activation=tanh, solver=sgd; total time= 6.0min




[CV] END ....................activation=logistic, solver=sgd; total time= 6.1min




[CV] END ....................activation=logistic, solver=sgd; total time= 6.2min
[CV] END .......................activation=relu, solver=adam; total time= 2.1min
[CV] END .......................activation=relu, solver=adam; total time= 1.5min




[CV] END ........................activation=relu, solver=sgd; total time= 4.8min




[CV] END ........................activation=relu, solver=sgd; total time= 4.2min


In [22]:
best_params
best_model2 = best_model.set_params(**best_params)
best_model2.fit(X_train, y_train)
best_accuracy_2 = best_model2.score(X_test, y_test)



In [23]:
best_accuracy_2

0.9770238095238095

### Predicting

In [24]:
predictions = best_model2.predict(df_test)

predictions_df = pd.DataFrame({
    'ImageId': df_test.index + 1,  # use row indexes and start from 1 (since it is like this in the sample submission)
    'Label': predictions  # Predictions from the model
})


predictions_df.to_csv('second_tuning.csv', index=False)




# Discussion



I had an extremely hard time with the hyperparameter-tuning. During the last homework, I had used randomized search with 20 iteration and 5 cross-validations, and I found out that my model had a lower accuracy than the one with no hyperpparameter-tuning, therefore, this time I wanted to be extensive with 100 iterations and 3 cross-validations. However, my code took a scarily long time to finish running and I don't really see why it should have taken this long (it took 880 minutes and 22 seconds). My thoughts for why it shouldn't have taken this long are the following:
1.  If the default value of 'hidden_layer_sizes' is 100, me setting three values : (50,),(100,), and (150,) should not have been a problem.
2.  Since I had only 2 hyperparameters (hidden_layer_sizes and alpha), it shouldn't have taken this long. (I had tried setting up around 4 hyperparameters to tune but stopped at around 600 minutes)

I decided to train hidden_layer_sizes and alpha because I intuitively felt that they could affect the model the most (hidden_layer_sizes to make the model accurate and alpha to control overfitting).

Furthermore, I realized too late that I shouldn't have used the training set after splitting the data into train,test to perform the hyperparameter-tuning but I would've ran out of time if I redid it.
