In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report 

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

In [None]:
import sys
import subprocess
def install(package):
    subprocess.check_call([sys.executable, "-m", "pip","install",package])
install("mlrose")

In [None]:
import six
import sys
sys.modules['sklearn.externals.six'] = six # hack: some library issue
import mlrose
import numpy as np

In [None]:
data = pd.read_csv("../input/drug-classification/drug200.csv")

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder


def label_encoder(y):
    le = LabelEncoder()
    data[y] = le.fit_transform(data[y])

#data['Na_to_K_Bigger_Than_15'] = [1 if i >=15.015 else 0 for i in data.Na_to_K]
#label_list = ["Sex","BP","Cholesterol","Na_to_K","Na_to_K_Bigger_Than_15","Drug"]

label_list = ["Sex","BP","Cholesterol","Na_to_K","Drug"]

for l in label_list:
    label_encoder(l)
    
X, y = data.drop(['Drug'], axis=1), data['Drug']
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.2, random_state=101)


# Normalize feature data
scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

y_train = y_train.to_frame()
y_test = y_test.to_frame()

# One hot encode target values
one_hot = OneHotEncoder()
y_train_hot = one_hot.fit_transform(y_train).todense()
y_test_hot =  one_hot.transform(y_test).todense()

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, multilabel_confusion_matrix, classification_report
import time

results = {}
time_map =  {}
curve_map = {}

def decode_one_hot(data):
    return pd.DataFrame(data).idxmax(axis=1).to_frame()

def get_report(true_data, pred):
    r = classification_report(decode_one_hot(true_data), decode_one_hot(pred), output_dict=True, zero_division = 0)
    print(classification_report(decode_one_hot(true_data), decode_one_hot(pred), zero_division=0))
    results = r['weighted avg']
    results['accuracy'] = r['accuracy']
    return results

for algorithm in ('random_hill_climb', 'simulated_annealing', 'genetic_alg'):
    # Initialize neural network object and fit object
    
    nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [8], activation = 'relu',
                                     algorithm = algorithm, max_iters = 200000,
                                     bias = True, is_classifier = True, learning_rate = 0.001,
                                     early_stopping = True, clip_max = 5, max_attempts = 100, restarts=0, schedule = mlrose.ExpDecay(exp_const=0.001), random_state = 3, curve=True)
    start = time.time()
    nn_model1.fit(X_train, y_train_hot)
    end = time.time()
    
    # Predict labels for train set and assess accuracy
    y_train_pred = nn_model1.predict(X_train_scaled)
    y_test_pred = nn_model1.predict(X_test_scaled)
    
    results[(f'{algorithm} train')] = get_report(y_train_hot, y_train_pred)
    results[(f'{algorithm} test')] = get_report(y_test_hot, y_test_pred)
    time_map[algorithm] = int(end - start)
    curve_map[algorithm] = nn_model1
    
print(time_map)
print(results)
#print(y_test_hot)
#print(pd.DataFrame(y_test_hot))
#print(pd.DataFrame(y_test_pred))
#print(pd.DataFrame(y_test_hot).idxmax(axis=1).to_frame().corrwith(pd.DataFrame(y_test_pred).idxmax(axis=1), axis =0))

In [None]:
print(curve_map['random_hill_climb'].fitted_weights)
print(curve_map['random_hill_climb'].predicted_probs)
print(curve_map['random_hill_climb'].fitness_curve)

In [None]:
print(curve_map['simulated_annealing'].fitted_weights)
print(curve_map['simulated_annealing'].predicted_probs)
print(curve_map['simulated_annealing'].fitness_curve)

In [None]:
results

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
def plot_results(data, title, xlabel, ylabel):
    fig, ax = plt.subplots()
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    
    ax.plot([i for i in range(len(data))], data, marker='o', label=title)
    ax.legend()
    plt.show()

In [None]:
plot_results(curve_map['random_hill_climb'].fitness_curve, 'RHC fitness_curve', 'iteration', 'fitness', )
plot_results(curve_map['simulated_annealing'].fitness_curve, 'SA fitness_curve', 'iteration', 'fitness', )
plot_results(curve_map['genetic_alg'].fitness_curve, 'GA fitness_curve', 'iteration', 'fitness', )



In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
def plot_results(results, title, xlabel, ylabel, skipped_algorithms=[]):
    fig, ax = plt.subplots()
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    

    keys = sorted(results.keys())
    #print(keys)
    values = [results[key][ylabel] for key in keys]
    ax.bar(keys,values)
    ax.set_xticklabels(keys, rotation = 90)
    plt.show()

In [None]:
plot_results(results, 'precision', 'algorimth', 'precision')

In [None]:
plot_results(results, 'recall', 'algorimth', 'recall')

In [None]:
plot_results(results, 'f1-score', 'algorimth', 'f1-score')

In [None]:
plot_results(results, 'accuracy', 'algorimth', 'accuracy')

In [None]:
algorithm = 'random_hill_climb'
    
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [8], activation = 'relu',
                                 algorithm = algorithm, max_iters = 200000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, clip_max = 5, max_attempts = 100, restarts=0, schedule = mlrose.ExpDecay(exp_const=0.001), random_state = 3, curve=True)
start = time.time()
nn_model1.fit(X_train, y_train_hot)
end = time.time()

# Predict labels for train set and assess accuracy
y_train_pred = nn_model1.predict(X_train_scaled)
y_test_pred = nn_model1.predict(X_test_scaled)

results= {}
results[(f'{algorithm} train')] = get_report(y_train_hot, y_train_pred)
results[(f'{algorithm} test')] = get_report(y_test_hot, y_test_pred)

plot_results(nn_model1.fitness_curve, 'RHC fitness_curve 2', 'iteration', 'fitness', )

In [None]:
results


In [None]:
algorithm = 'simulated_annealing'
    
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [8], activation = 'relu',
                                 algorithm = algorithm, max_iters = 200000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, clip_max = 5, max_attempts = 100, restarts=0, schedule = mlrose.ExpDecay(exp_const=0.001), random_state = 3, curve=True)
start = time.time()
nn_model1.fit(X_train, y_train_hot)
end = time.time()

# Predict labels for train set and assess accuracy
y_train_pred = nn_model1.predict(X_train_scaled)
y_test_pred = nn_model1.predict(X_test_scaled)

results= {}
results[(f'{algorithm} train')] = get_report(y_train_hot, y_train_pred)
results[(f'{algorithm} test')] = get_report(y_test_hot, y_test_pred)

plot_results(nn_model1.fitness_curve, 'SA fitness_curve 2', 'iteration', 'fitness', )

In [None]:
results