### Import libraries

In [1]:
# Ingore warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# mlrose: created by Genevieve Hayes, modified by Andrew Rollings
import mlrose

# Numpy
import numpy as np

In [3]:
# Sklearn metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import make_scorer
from sklearn.metrics import roc_curve
from sklearn.metrics import auc

# Sklearn model selection
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import learning_curve
from sklearn.model_selection import validation_curve

# Preprocessing
from sklearn.preprocessing import StandardScaler

# Plotting
import matplotlib.pyplot as plt

### Load and preprocess data

In [4]:
# Train Data
X_train = np.genfromtxt('./data/Spambase_X_train.csv', delimiter=',')
# Test Data
X_test = np.genfromtxt('./data/Spambase_X_test.csv', delimiter=',')

# # Train Labels
y_train = np.genfromtxt('./data/Spambase_y_train.csv', delimiter=',')
# Test Labels
y_test = np.genfromtxt('./data/Spambase_y_test.csv', delimiter=',')

In [5]:
# Standardize data with StandardScaler
scaler = StandardScaler()

# Fit scaler to the training set
scaler.fit(X_train)

# Preprocess Train Data
X_train = scaler.transform(X_train)
# Preprocess Test Data
X_test = scaler.transform(X_test)

### AUC Scorer

In [6]:
scorer = make_scorer(roc_auc_score)

### Randomized Hill Climbing

In [13]:
nn_rhc = mlrose.NeuralNetwork(hidden_nodes=[120], activation='relu', 
                              algorithm='random_hill_climb', 
                              max_iters=20000, bias=True, is_classifier=True, 
                              learning_rate=0.001, early_stopping=False,
                              clip_max=5, max_attempts=10, restarts=5, random_state=42)

In [14]:
%%time
nn_rhc.fit(X_train, y_train)

Wall time: 38min 47s


NeuralNetwork(activation='relu', algorithm='random_hill_climb', bias=True,
              clip_max=5, curve=False, early_stopping=False, hidden_nodes=[120],
              is_classifier=True, learning_rate=0.001, max_attempts=10,
              max_iters=20000, mutation_prob=0.1, pop_size=200, random_state=42,
              restarts=5,
              schedule=GeomDecay(init_temp=1.0, decay=0.99, min_temp=0.001))

In [15]:
%%time
y_pred = nn_rhc.predict(X_test)
print('Test Accuracy: %.3f' % accuracy_score(y_test, y_pred))
print('Test AUC: %.3f' % roc_auc_score(y_test, y_pred))

Test Accuracy: 0.550
Test AUC: 0.537
Wall time: 9.97 ms


### Simulated Annealing

In [19]:
nn_sa = mlrose.NeuralNetwork(hidden_nodes=[120], activation='relu', 
                             algorithm='simulated_annealing', 
                             max_iters=20000, bias=True, is_classifier=True, 
                             learning_rate=0.001, early_stopping=False, schedule=mlrose.GeomDecay(decay=0.99), 
                             clip_max=5, max_attempts=10, random_state=42)

In [20]:
%%time
nn_sa.fit(X_train, y_train)

Wall time: 8min 45s


NeuralNetwork(activation='relu', algorithm='simulated_annealing', bias=True,
              clip_max=5, curve=False, early_stopping=False, hidden_nodes=[120],
              is_classifier=True, learning_rate=0.001, max_attempts=10,
              max_iters=20000, mutation_prob=0.1, pop_size=200, random_state=42,
              restarts=0,
              schedule=GeomDecay(init_temp=1.0, decay=0.99, min_temp=0.001))

In [21]:
%%time
y_pred = nn_sa.predict(X_test)
print('Test Accuracy: %.3f' % accuracy_score(y_test, y_pred))
print('Test AUC: %.3f' % roc_auc_score(y_test, y_pred))

Test Accuracy: 0.545
Test AUC: 0.532
Wall time: 8.49 ms


#### Conclusions
Randomized Hill Climbing @20K iterations:
* Test Accuracy: 0.550
* Test AUC: 0.537

Simulated Annealing @20K iterations:
* Test Accuracy: 0.545
* Test AUC: 0.532

#### No significant improvements were observed even after 20K iterations.