In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import time

import mlrose
import pandas as pd
import traitlets.utils.bunch
from mlrose import NNGSRunner
from sklearn.datasets import load_iris
from sklearn.metrics import make_scorer, f1_score, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import numpy as np


def process_abalone_ternary():
    df = pd.read_csv('data/abalone.data', names=["Sex", "Length", "Diameter", "Height",
                                                 "Whole weight", "Shucked weight", "Viscera weight",
                                                 "Shell weight", "Rings"])
    df = df[(df["Height"] != 1.13) & (df['Height'] != 0.515)]

    # deal with categorical data
    df.loc[df.Sex == 'M', 'Male'] = 1.
    df.loc[df.Sex == 'F', 'Female'] = 1.
    df.loc[df.Sex == 'I', 'Infant'] = 1.
    df.fillna(0, inplace=True)

    # bucketize rings
    df.loc[df.Rings < 11, 'Rings'] = 1.
    df.loc[(df.Rings < 21) & (df.Rings > 10), 'Rings'] = 2.
    df.loc[df.Rings > 20, 'Rings'] = 3.

    return traitlets.Bunch(
        data=df[['Male', 'Female', 'Infant', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight',
                 'Viscera weight', 'Shell weight']].values,
        target=df[['Rings']].values,
        target_names=df["Rings"].unique(),
        DESCR='abalone dataset...',
        feature_names=['Male', 'Female', 'Infant', "Length", "Diameter", "Height",
                       "Whole weight", "Shucked weight", "Viscera weight",
                       "Shell weight"],
    )

data = process_abalone_ternary()
# Split data into training and test sets

X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, \
                                                    test_size = 0.2, random_state = 3)

# Normalize feature data
scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# One hot encode target values
one_hot = OneHotEncoder()

y_train_hot = one_hot.fit_transform(y_train.reshape(-1, 1)).todense()
y_test_hot = one_hot.transform(y_test.reshape(-1, 1)).todense()


In [None]:
# Initialize neural network object and fit object
csv_path = 'out/nn_genetic_alg.csv'
cols = ['iter', 'train_accuracy', 'test_accuracy', 'test_f1', 'fit_time', 'loss']
data = []
for i in [1, 50, 100, 200, 400, 800, 1600, 3200]:
    st=time.time()
    nn_modelga = mlrose.NeuralNetwork(hidden_nodes = [10], activation = 'relu', \
                                     algorithm = 'genetic_alg', max_iters = i, \
                                     bias = True, is_classifier = True, learning_rate = 0.01, \
                                     early_stopping = True, clip_max = 5, max_attempts = 100, \
                                     random_state = 3, schedule=mlrose.GeomDecay(init_temp=100))

    nn_modelga.fit(X_train_scaled, y_train_hot)
    fn=time.time()

    from sklearn.metrics import accuracy_score

    # Predict labels for train set and assess accuracy

    y_train_pred = nn_modelga.predict(X_train_scaled)

    y_train_accuracy = accuracy_score(y_train_hot, y_train_pred)


    # Predict labels for test set and assess accuracy
    y_test_pred = nn_modelga.predict(X_test_scaled)

    y_test_accuracy = accuracy_score(y_test_hot, y_test_pred)
    
    y_test_f1 = f1_score(y_test_hot, y_test_pred, average='macro')

    row = [i, y_train_accuracy, y_test_accuracy, y_test_f1, fn-st, nn_modelga.loss]
    data.append(row)
    print (row)
    nn_modelga=[]
result = pd.DataFrame(data, columns=cols)
result.to_csv(csv_path, index=None)

[1, 0.588622754491018, 0.6011976047904192, 0.35053095053095057, 0.9638581275939941, 1.055392472821766]


In [None]:
pd.read_csv(csv_path)