# ML Assignment 2: Optimization

## Overview

In developing this script, I referenced
 Hayes, G. (2019). mlrose: Machine Learning, Randomized Optimization and SEarch package for Python. https://github.com/gkhayes/mlrose 
 and also https://github.com/hiive/mlrose

mlrose is a Python package for applying some of the most common randomized optimization and search algorithms to a range of different optimization problems, over both discrete- and continuous-valued parameter spaces. This notebook contains the examples used in the mlrose tutorial.

### Import Libraries

In [1]:
import time

import mlrose_hiive as mlrose
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt

### Problem 1:  Fill the Knapsack!

In [2]:
def plot(fitness_curve, name="unknown"):
    plt.title("Line graph")
    plt.xlabel("Iteration")
    plt.ylabel("Fitness Score")
    plt.plot(range(0,len(fitness_curve)), fitness_curve[:,0], color ="red")
    plt.savefig(f'charts/{name}_fitness.png', bbox_inches='tight')
    plt.clf()
    
    plt.title("Evaluations per Iteration")
    plt.xlabel("Iteration")
    plt.ylabel("# Evaluations")
    plt.plot(range(0,len(fitness_curve)), fitness_curve[:,1], color ="red")
    plt.savefig(f'charts/{name}_evaluations.png', bbox_inches='tight')
    plt.clf()

    
def do_it_all(algorithm, name="Unknown", weight = []):
    tic = time.perf_counter()
    best_state, best_fitness, fitness_curve = algorithm()
    toc = time.perf_counter()
    print(f"{name} completed in {toc - tic:0.4f} seconds")
    print(f'{name} thinks best fitness/value is ', best_fitness)
    total_weight = 0
    for index, item in enumerate(best_state):
        if item == 1:
            total_weight += weight[index]
        elif item != 0:
            print('idk what this is')
            print(item)
            raise Exception("you should not get here")
    print(f'{name} total weight packed is {total_weight}')
    plot(fitness_curve, f'{name}')
    

In [None]:
# Initialize fitness function object using pre-defined class
'''
Fitness function for Knapsack optimization problem. Given a set of n
items, where item i has known weight :math:`w_{i}` and known value
:math:`v_{i}`; and maximum knapsack capacity, :math:`W`, the Knapsack
fitness function evaluates the fitness of a state vector
:math:`x = [x_{0}, x_{1}, \ldots, x_{n-1}]` as:
'''
# https://en.wikipedia.org/wiki/Knapsack_problem
# We're trying to get to the highest value possible, without going over our weight limit

ns = [5,10,50,100,200] # items in our knapsack
for n in ns:
    print('\n\n=========================')
    print(f'Using {n} Items...')
    print('=========================')
    weights = np.random.randint(1,50,size=(n))
    values =  np.random.randint(1,50,size=(n))
    max_weight_pct = 0.6 # so we can only hold 60% of our total weight for items we're trying to fit
    print(f'Total weight of items: {np.sum(weights)}')
    print('Max Weight Allowed: ', np.sum(weights)*max_weight_pct)
    fitness = mlrose.Knapsack(weights, values, max_weight_pct)

    plt.title("Evaluations per Iteration")
    plt.xlabel("Weights")
    plt.ylabel("Value")
    plt.scatter(weights, values)
    plt.savefig(f'charts/knapsack_problem.png', bbox_inches='tight')
    plt.clf()

    problem = mlrose.DiscreteOpt(length = n, fitness_fn = fitness, maximize=True, max_val=2)

    # Now that we've defined the problem, lets see which one can solve it best

    # basically have a seed state so we can reproduce
    init_state = 5
    alg = lambda: mlrose.random_hill_climb(problem, 
                                           max_attempts=100, 
                                           restarts=100, 
                                           curve=True, 
                                           random_state = init_state)
    do_it_all(alg, f'knapsack_random_hill_climbing_{n}', weights)

    schedule = mlrose.ExpDecay()
    alg = lambda: mlrose.simulated_annealing(problem, 
                                             schedule = schedule, 
                                             max_attempts = 100,
                                             curve=True, 
                                             max_iters = 50, 
                                             random_state = init_state)
    do_it_all(alg, f'knapsack_simulated_annealing_{n}', weights)

    alg = lambda: mlrose.genetic_alg(problem, 
                                     pop_size=200, 
                                     pop_breed_percent=0.75,
                                     elite_dreg_ratio=0.99, 
                                     minimum_elites=0,
                                     minimum_dregs=0,
                                     mutation_prob=0.1,
                                     max_attempts=100,
                                     max_iters=1000,
                                     curve=True, 
                                     random_state=init_state,
                                     state_fitness_callback=None, 
                                     callback_user_info=None,
                                     hamming_factor=0.0, 
                                     hamming_decay_factor=None)
    
    do_it_all(alg, f'knapsack_genetic_algorithm_{n}', weights)


    alg = lambda: mlrose.mimic(problem, 
                               pop_size=200, 
                               keep_pct=0.2, 
                               max_attempts=5,
                               curve=True, 
                               random_state=init_state, 
                               state_fitness_callback=None,
                               callback_user_info=None, noise=0.0)
    do_it_all(alg, f'knapsack_mimic_{n}', weights)




Using 5 Items...
Total weight of items: 68
Max Weight Allowed:  40.8
knapsack_random_hill_climbing_5 completed in 0.2558 seconds
knapsack_random_hill_climbing_5 thinks best fitness/value is  88.0
knapsack_random_hill_climbing_5 total weight packed is 34
knapsack_simulated_annealing_5 completed in 0.0017 seconds
knapsack_simulated_annealing_5 thinks best fitness/value is  44.0
knapsack_simulated_annealing_5 total weight packed is 32
knapsack_genetic_algorithm_5 completed in 1.2126 seconds
knapsack_genetic_algorithm_5 thinks best fitness/value is  88.0
knapsack_genetic_algorithm_5 total weight packed is 34
knapsack_mimic_5 completed in 0.0699 seconds
knapsack_mimic_5 thinks best fitness/value is  88.0
knapsack_mimic_5 total weight packed is 34


Using 10 Items...
Total weight of items: 204
Max Weight Allowed:  122.39999999999999
knapsack_random_hill_climbing_10 completed in 0.2702 seconds
knapsack_random_hill_climbing_10 thinks best fitness/value is  224.0
knapsack_random_hill_climbing

### Part 2: Six Peaks Using Custom Fitness Function

In [None]:
# Initialize fitness function object using pre-defined class

n = 50 
fitness = mlrose.SixPeaks(t_pct=0.15)

problem = mlrose.DiscreteOpt(length=n, fitness_fn = fitness, maximize=True, max_val=2)

# Now that we've defined the problem, lets see which one can solve it best

# Randomized Hill Climbing
# basically have a seed state so we can reproduce
init_state = 5
best_state, best_fitness, fitness_curve = mlrose.random_hill_climb(problem, max_attempts=100, restarts=50, curve=True, 
                                                                   random_state=init_state)

print('Random Hill Climb thinks best fitness is ', best_fitness)
plot(fitness_curve)

schedule = mlrose.ExpDecay()
best_state, best_fitness, fitness_curve = mlrose.simulated_annealing(problem, schedule = schedule, max_attempts = 100,
                                                                     curve=True, max_iters = 50, random_state = init_state)

print('Simulated Annealing thinks best fitness is ', best_fitness)
plot(fitness_curve)


best_state, best_fitness, fitness_curve = mlrose.genetic_alg(problem, pop_size=200, pop_breed_percent=0.75,
                                                             elite_dreg_ratio=0.99, minimum_elites=0,
                                                             minimum_dregs=0, mutation_prob=0.1, max_attempts=100,
                                                             max_iters=1000, curve=True, random_state=init_state,
                                                             state_fitness_callback=None, callback_user_info=None,
                                                             hamming_factor=0.0, hamming_decay_factor=None)

print('Genetic Algorithm thinks best fitness is ', best_fitness)
plot(fitness_curve)


best_state, best_fitness, fitness_curve = mlrose.mimic(problem, pop_size=200, keep_pct=0.2, 
                                                       max_attempts=10 ,
                                                       curve=True, random_state=init_state, 
                                                       state_fitness_callback=None, 
                                                       callback_user_info=None, noise=0.0)

print('MIMIC thinks best fitness is ', best_fitness)
plot(fitness_curve)


### Example 3: Travelling Salesperson Using Coordinate-Defined Fitness Function

In [None]:
# Create list of city coordinates
n = 40
coords_list = []
for i in range(n):
    coords_list.append((np.random.randint(50),np.random.randint(50)))

# Initialize fitness function object using coords_list
fitness_coords = mlrose.TravellingSales(coords = coords_list)
# Define optimization problem object
problem = mlrose.TSPOpt(length = len(coords_list), fitness_fn = fitness_coords, maximize = True)


# Now that we've defined the problem, lets see which one can solve it best

# Randomized Hill Climbing
# basically have a seed state so we can reproduce
init_state = 5
best_state, best_fitness, fitness_curve = mlrose.random_hill_climb(problem, max_attempts=100, restarts=50, curve=True, 
                                                                   random_state=init_state)

print('Random Hill Climb thinks best fitness is ', best_fitness)
plot(fitness_curve)

schedule = mlrose.ExpDecay()
best_state, best_fitness, fitness_curve = mlrose.simulated_annealing(problem, schedule = schedule, max_attempts = 100,
                                                                     curve=True, max_iters = 50, random_state = init_state)

print('Simulated Annealing thinks best fitness is ', best_fitness)
plot(fitness_curve)

best_state, best_fitness, fitness_curve = mlrose.genetic_alg(problem, pop_size=200, pop_breed_percent=0.75,
                                                             elite_dreg_ratio=0.99, minimum_elites=0,
                                                             minimum_dregs=0, mutation_prob=0.1, max_attempts=100,
                                                             max_iters=1000, curve=True, random_state=init_state,
                                                             state_fitness_callback=None, callback_user_info=None,
                                                             hamming_factor=0.0, hamming_decay_factor=None)

print('Genetic Algorithm thinks best fitness is ', best_fitness)
plot(fitness_curve)

best_state, best_fitness, fitness_curve = mlrose.mimic(problem, pop_size=200, keep_pct=0.2, 
                                                       max_attempts=10 ,
                                                       curve=True, random_state=init_state, 
                                                       state_fitness_callback=None, 
                                                       callback_user_info=None, noise=0.0)

print('MIMIC thinks best fitness is ', best_fitness)
plot(fitness_curve)

### Example 6: Fitting a Neural Network to the Iris Dataset

In [None]:
# Load the Iris dataset
data = load_iris()
df = pd.read_csv("data/heart.csv")
X = df.iloc[:,0:-1]
y = df.iloc[:,-1]
 # https://www.kaggle.com/ronitf/heart-disease-uci


In [None]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, 
                                                    random_state = 3)

In [None]:
# Normalize feature data
scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# One hot encode target values
one_hot = OneHotEncoder()

y_train_hot = one_hot.fit_transform(np.array(y_train).reshape(-1, 1)).todense()
y_test_hot = one_hot.transform(np.array(y_test).reshape(-1, 1)).todense()

In [None]:
# Initialize neural network object and fit object - attempt 1
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [2], activation ='relu', 
                                 algorithm ='random_hill_climb', 
                                 max_iters = 1000, bias = True, is_classifier = True, 
                                 learning_rate = 0.0001, early_stopping = True, 
                                 clip_max = 5, max_attempts = 100, random_state = 3)

nn_model1.fit(X_train_scaled, y_train_hot)

In [None]:
# Predict labels for train set and assess accuracy
y_train_pred = nn_model1.predict(X_train_scaled)

y_train_accuracy = accuracy_score(y_train_hot, y_train_pred)

print(y_train_accuracy)

In [None]:
# Predict labels for test set and assess accuracy
y_test_pred = nn_model1.predict(X_test_scaled)

y_test_accuracy = accuracy_score(y_test_hot, y_test_pred)

print(y_test_accuracy)

In [None]:
# Initialize neural network object and fit object - attempt 2
nn_model2 = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu', 
                                 algorithm = 'gradient_descent', 
                                 max_iters = 1000, bias = True, is_classifier = True, 
                                 learning_rate = 0.0001, early_stopping = True, 
                                 clip_max = 5, max_attempts = 100, random_state = 3)

nn_model2.fit(X_train_scaled, y_train_hot)

In [None]:
# Predict labels for train set and assess accuracy
y_train_pred = nn_model2.predict(X_train_scaled)

y_train_accuracy = accuracy_score(y_train_hot, y_train_pred)

print(y_train_accuracy)

In [None]:
# Predict labels for test set and assess accuracy
y_test_pred = nn_model2.predict(X_test_scaled)

y_test_accuracy = accuracy_score(y_test_hot, y_test_pred)

print(y_test_accuracy)