In [1]:
import six
import sys
sys.modules['sklearn.externals.six'] = six
import mlrose

In [15]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [7]:
hr_data = pd.read_csv("hr_data.csv")

categorical_cols = ['city', 'gender',
       'relevent_experience', 'enrolled_university', 'education_level',
       'major_discipline', 'experience', 'company_size', 'company_type',
       'last_new_job']

for i in categorical_cols: 
    le = preprocessing.LabelEncoder()
    le.fit(hr_data[i].unique())
    hr_data[i] = le.transform(hr_data[i])

target = 'target'
cols = ['city', 'city_development_index', 'gender',
       'relevent_experience', 'enrolled_university', 'education_level',
       'major_discipline', 'experience', 'company_size', 'company_type',
       'last_new_job', 'training_hours']

#splitn into training and testing data sets
train, test = train_test_split(hr_data, test_size=0.3, random_state = 30)


In [None]:
max_iter = np.linspace(1, 1000, num = 50 , dtype = int)


# Random Hill Climb

In [26]:
random_accuracy = []
random_runtime = []
for i in max_iter:
    start_time = time.time()
    nn_random = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu', 
                                     algorithm = 'random_hill_climb', max_iters = int(i), 
                                     bias = True, is_classifier = True, learning_rate = 0.0001, 
                                     early_stopping = True, clip_max = 5, max_attempts = 100, 
                                     random_state = 3)

    runtime = time.time() -start_time  
    random_runtime.append(runtime)
    
    nn_random.fit(train[cols], train[target])
    random_accuracy.append(accuracy_score(test[target], nn_random.predict(test[cols])))


In [27]:
print(random_accuracy)

0.750347947112039


# Simulated Annealing

In [19]:
sa_accuracy = []
sa_runtime = []
for i in max_iter:

    start_time = time.time()
    nn_sa = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu', 
                                     algorithm = 'simulated_annealing', max_iters = int(i), 
                                     bias = True, is_classifier = True, learning_rate = 0.0001, 
                                     early_stopping = True, clip_max = 5, max_attempts = 100, 
                                     random_state = 3)

    runtime = time.time() -start_time  
    sa_runtime.append(runtime)
    
    nn_sa.fit(train[cols], train[target])
    sa_accuracy.append(accuracy_score(test[target], nn_sa.predict(test[cols])))


In [21]:
print(sa_accuracy)

0.750347947112039


#  Genetic Algorithm

In [29]:
import warnings

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    
    max_iter = np.linspace(1, 1000, num = 50 , dtype = int)

    ga_accuracy = []
    ga_runtime = []
    for i in max_iter:
        
        start_time = time.time()
        nn_ga = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu', 
                                         algorithm = 'genetic_alg', max_iters = int(i), 
                                         bias = True, is_classifier = True, learning_rate = 0.0001, 
                                         early_stopping = True, clip_max = 5, max_attempts = 100, 
                                         random_state = 3)

        runtime = time.time() -start_time  
        ga_runtime.append(runtime)
        
        nn_ga.fit(train[cols], train[target])
        ga_accuracy.append(accuracy_score(test[target], nn_ga.predict(test[cols])))


KeyboardInterrupt: 

In [25]:
print(ga_accuracy)

0.750347947112039


# Basic Backpropagation

In [23]:
accuracy = []
bp_runtime = []
for i in max_iter:
    start_time = time.time()
    nn = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu', 
                                     max_iters = int(i), 
                                     bias = True, is_classifier = True, learning_rate = 0.0001, 
                                     early_stopping = True, clip_max = 5, max_attempts = 100, 
                                     random_state = 3)
    
    runtime = time.time() -start_time  
    bp_runtime.append(runtime)

    nn.fit(train[cols], train[target])
    accuracy.append(accuracy_score(test[target], nn.predict(test[cols])))


In [24]:
print(accuracy)

0.750347947112039


In [None]:
plt.plot(max_iter, sa_accuracy, '--', color="b",  label="Simulated Annealing")
plt.plot(max_iter, ga_accuracy, color="r",  label="Genetic")
plt.plot(max_iter, accuracy, color="k",  label="Back Propagation")
plt.plot(max_iter, random_accuracy, linestyle = 'dotted', color="m",  label="Random Hill Climb")

plt.title("")
plt.xlabel("Number of Iterations")
plt.ylabel("Fitness Score") 
plt.legend(loc="best")
plt.tight_layout()
plt.show()