In [None]:
import pandas as pd
from pandas.plotting import scatter_matrix
data = pd.read_csv('New_Data.csv')

X = data.iloc[:,:4]
Y = data['Name']
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [None]:
X_train_org, X_test_org, y_train, y_test = train_test_split(X, Y, random_state = 0, test_size = 0.2)
#Split the data into training and testing sets.

scaler = MinMaxScaler() #Scaling the data because sometimes, the data varies a lot(check X.describe())
X_train = scaler.fit_transform(X_train_org)
X_test = scaler.fit_transform(X_test_org)

### Check this Keras documentation for everything about the Model :https://keras.io/models/model/ or Sequential which is the most basic type: https://keras.io/getting-started/sequential-model-guide/

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras import optimizers

# Creating the model
### First, we must create the model and specify the number of layers using model.add(Dense()). We also specify the type of activation which is how the layer will convert input to an output that the next layer will use as input. Read this for more on activations: https://keras.io/activations/
#### Dense(num...: Specifies how many layers to add.
#### Activation: The activation parameter in Dense() specifies the type of activation function being used. The purpose of an activation function is to convert a node of input to an output signal. This output signal will now be used as input in the next layer. One common type is sigmoid: S-shaped range of 0 to 1. Another is tanh: Range of -1 to 1. Suffers from vanishing gradient problem. A POPULAR ONE is ReLu: Avoids vanishing gradient problem. Should be used within hidden layers. 
#### input_dim is the number of features(columns).


In [None]:
model = Sequential()
model.add(Dense(8, activation = "softmax", input_dim = 4)) #input_dim is # of features

#model.add(Dense(2, activation = "sigmoid"))
model.add(Dense(1)) #activation default is linear

# Compilation
### Before traning a model, we must compile the model by configuring the optimization process. We use .compile() and pass in 3 arguments: optimizer function type, loss function we are trying to minimize with our model, (optional) list of metrics to calculate. (The specified metrics are returned in a list after evaluation).

#### Learning rate is essentially how quickly a model abandons old beliefs for new ones. It is also how fast the weights change. For example, if the model sees 10 orange cats it will think all cats are orange. If it sees a black cat and has a low learning rate, it will be slow to learn that cats can also be black and will think that it is an outlier. With a very high learning rate, it will change its mind very quickly for the new data. A good learning rate is low enough that the network converges to something useful, but high enough that it doesn't take too long to train.
#### Optimization procedures are methods that define a loss and cost function to minimize both. They are used to minimize loss in the training process. The optimization parameter specifies the type of optimizer being used. To see all the optimizer parameters and their information, check this documentation by Keras: https://keras.io/optimizers/ 
#### The loss parameter specifies how the loss is calculated. Loss is calculated on training and validation. Loss is NOT a percentage. It is the summation of the errors made for each example in the training and validation sets. Therefore, the goal of a model is to reduce the loss by changing weight values through different optimization methods. Loss value implies how well or poorly a certain model behaves after each iteration of optimization. Ideally, one would expect the reduction of loss after each, or several, iteration(s). The accuracy of a model is usually determined after the model parameters are learned and fixed and no learning is taking place. It is done by dividing correctly classified samples by all samples.  For more about loss, check this SO question: https://stackoverflow.com/questions/34518656/how-to-interpret-loss-and-accuracy-for-a-machine-learning-model For the TYPES of loss functions, check this KERAS DOCUMENTATION: https://keras.io/losses/
#### metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use metrics=['accuracy']. To specify different metrics for different outputs of a multi-output model, you could also pass a dictionary, such as metrics={'output_a': 'accuracy'}. 


In [None]:
opt = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
model.compile(optimizer = opt, loss = "mean_absolute_error", metrics = ['accuracy'])
#optimizes the model.
#rmsprop optimizer: 47.62%, mean_absolute error, softplus

# Training
### We use model.fit() to train model on numpy arrays given a number of epoches (iterations) and batch_size. The model learns from previous iterations and uses its optimizer to try to reduce the loss. Possible parameters are below. 

#### fit(x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None)
#### Epoch: the number of iterations. An (1) epoch is an iteration over the ENTIRE x and y data provided.
#### Batch size: the number of samples (from the data) per gradient update. If unspecified, batch_size will default to 32. What it does is whatever the batch size is, it takes that number of rows from the data and trains the model on that. Then, it goes to the next batch of that size and trains it. Small batch sizes are good because the neural network gets to update the weights after running each batch. It also requires less memory and trains faster. Disadvantage is that the gradient estimate will be less accurate.  The gradient is an optimization procedure that refers to calculating the derivative from all training data before calculating an update. For more about batch sizes, check this question: https://stats.stackexchange.com/questions/153531/what-is-batch-size-in-neural-network
#### class_weight: Optional dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). This can be useful to tell the model to "pay more attention" to samples from an under-represented class.

In [None]:
model.fit(X_train, y_train, epochs = 2500, batch_size = 10, class_weight = None)
#Trains from previous iterations.

# Evaluation
### Now that we have trained the model, we feed it testing data and evaluate it. model.evaluate(X_test, y_test) returns us the accuracy (it is the second and last element in the array). We can also use model.metrics_names[] to get an array with the metrics and their values that we specified in compilation.

In [None]:
scores = model.evaluate(X_test, y_test)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

# Automated Testing - Activation, Optimizer, Loss
### The below code runs 40 different tests for 5 different activations: ReLU, SELU, ELU, Softplus,Softmax and 4             optimizers: RMSProp, Adam, Adagrad, Adamax 

In [None]:
activations = ["relu", "selu", "elu", "softplus", "softmax"]
opt1 = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
opt2 = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
opt3 = optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)
opt4 =optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
optimizern = [1, 2, 3, 4]
loss = ['mean_squared_error', 'mean_absolute_error']

In [None]:
info_list = []
float count = 0
for a in range (len(activations)):
    for b in range (4):
        for c in range (len(loss)):
            activename = activations[a]
            model = Sequential()
            model.add(Dense(8, activation = activations[a], input_dim = 4)) #input_dim is # of features
            model.add(Dense(1)) 
            optimname = "hi"
            opt = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
            if(b == 0):
                opt = opt1
                optimname = "RMSProp"
            elif(b == 1):
                opt = opt2
                optimname = "Adam"
            elif(b == 2):
                opt = opt3
                optimname = "Adagrad"
            elif(b == 3):
                opt = opt4
                optimname = "Adamax"
            lossname = loss[c]
            print(activename+ ": " + optimname + " , " + lossname)
            model.compile(optimizer = opt, loss = loss[c], metrics = ['accuracy'])
            model.fit(X_train, y_train, epochs = 2500, batch_size = 10, class_weight = None)
            scores = model.evaluate(X_test, y_test)
            info_list.append([scores[1]*100, activename+ ": " + optimname + " , " + lossname])
            print(scores[1]*100,"%")
            count += 1
            print(count*5/2, "% Complete!")
print(info_list)

In [None]:
for i in info_list:
    print(i)

## Visualizing Data

In [None]:
import itertools
import numpy as np
import matplotlib.pyplot as plt
objects = ('ReLu', 'SELU', 'ELU', 'Softplus', 'Softmax')
y_pos = np.arange(len(objects))
performance = [90.47,95.24,76.19, 76.19, 90]
 
## IMPORTANT DATA
# ELU AVERAGE - 29.5
# ADAM AVERAGE - 25.7
# -------
# ReLU - 23.81
# SELU - 26.19
# Softplus - 19.05
# Softmax - 26.19

#plt.bar(y_pos, performance, color = 'g', align='center', alpha=0.5)
#plt.xticks(y_pos, objects)
n_groups = 5
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.2
opacity = 0.8
rms_vals = (33.33, 23.81, 33.33, 19.05, 14.29) #Average - 24.762 
rects1 = plt.bar(index-0.2, rms_vals, bar_width,
                 alpha=opacity,
                 color='r',
                 label='RMSProp')
adam_vals = (19.05, 33.33, 33.33, 14.29, 28.57) #Average - 25.714 
rects2 = plt.bar(index, adam_vals, bar_width,
                 alpha=opacity,
                 color='g',
                 label='Adam')
adagrad_vals = (23.81, 23.81, 33.33, 23.81, 23.81) #Average - 25.714 
rects3 = plt.bar(index + 0.2, adagrad_vals, bar_width,
                 alpha=opacity,
                 color='c',
                 label='Adagrad')
adamax_vals = (19.05, 23.81, 23.81, 19.05, 38.1) #Average - 24.764
rects4 = plt.bar(index + 0.4, adamax_vals, bar_width,
                 alpha=opacity,
                 color='m',
                 label='Adamax')
plt.plot(objects, (23.81, 26.19, 29.532, 19.05, 26.19) , label = "Average", c= "k")
plt.xlabel('Layer Activation Functions', size = 22)
#plt.ylabel('Accuracy (Unoptimized Learning Rate, Batch Size, Epoch)', size = 17)
#plt.suptitle('Keras Neural Network Sequential Model Accuracy Under Different\nOptimizers, Activations and Mean_Absolute_Error Loss', size = 25)
plt.ylabel('Accuracy (Unoptimized Hyperparameters)', size = 23)

plt.suptitle('Keras Neural Network Sequential Model Accuracy Under\nDifferent Optimizers and Activation Functions', size = 25)

plt.xticks(index + bar_width, objects, fontsize=18)
plt.yticks([0,5,10,15,20,25,30,35,40], [0,5,10,15,20,25,30,35,40], fontsize=18)
leg = plt.legend(title = "Optimization Algorithms", loc = 1, prop={'size': 18})
plt.rcParams["figure.figsize"] = [16, 9]
#plt.tight_layout()
plt.show()

plt.show()

### Average Accuracies for relu, selu, elu, softplus, softmax

In [None]:
import matplotlib.pyplot as plt
x_axis = range(1, 8) # x_axis values
%matplotlib inline
#x-values, y-values, Name for legend, color
plt.plot(x_axis, relu_score , label = "ReLu RMSProp", c= "g") #Plots a green line
plt.plot(x_axis, softmax_score, label = "Softmax Adamax", c= "b")  #Plots a blue line
plt.xlabel('[0.0001,      0.001,      0.002,      0.005,      0.01,      0.05,      0.1]')
plt.ylabel('Accuracy')
plt.title("Optimal learning rate for Adamax Optimizer and RMSProp Optimizer.")
plt.legend()
print("The best is: Softmax, Adamax, mean_absolute_error with learning_rate of 0.05.\nadamax = optimizers.Adamax(lr= 0.05, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)")

In [None]:
#Optimal values were softmax activation, adamax optimizer, and mean_absolute_error.
#optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)

# Automated Testing - Learning Rate
### This automated test is for ReLu, RMSProp, mean_squared_error and SoftMax, Adamax, mean_absolute_error for different learning rates.

In [None]:
relu_score = []
softmax_score = []
rms = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
adamax =optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
learning_rates = [0.0001, 0.001, 0.002, 0.005, 0.01, 0.05, 0.1]
counter = 0;
for i in range(len(learning_rates)):
    model = Sequential()
    model.add(Dense(8, activation = "relu", input_dim = 4)) #input_dim is # of features
    model.add(Dense(1)) 
    counter += 1
    print("ReLu", learning_rates[i], "{:.4f}% Complete".format(100*counter/14),"------------------------------------------------------------------")
    rms = optimizers.RMSprop(lr= learning_rates[i], rho=0.9, epsilon=None, decay=0.0)
    model.compile(optimizer = rms, loss = 'mean_squared_error', metrics = ['accuracy'])
    model.fit(X_train, y_train, epochs = 2500, batch_size = 10, class_weight = None)
    scores = model.evaluate(X_test, y_test)
    relu_score.append(scores[1]*100)
    print(scores[1]*100, learning_rates[i])
for i in range(len(learning_rates)):
    model = Sequential()
    model.add(Dense(8, activation = "softmax", input_dim = 4)) #input_dim is # of features
    model.add(Dense(1)) 
    counter += 1
    print("SoftMax", learning_rates[i], "{:.4f}% Complete".format(100*counter/14),"------------------------------------------------------------------")
    adamax = optimizers.Adamax(lr= learning_rates[i], beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
    model.compile(optimizer = adamax, loss = 'mean_absolute_error', metrics = ['accuracy'])
    model.fit(X_train, y_train, epochs = 2500, batch_size = 10, class_weight = None)
    scores = model.evaluate(X_test, y_test)
    softmax_score.append(scores[1]*100)
    print(scores[1]*100, learning_rates[i])
print(relu_score)
print(softmax_score)

In [None]:
print(relu_score)
print(softmax_score)

In [None]:
import matplotlib.pyplot as plt
x_axis = range(1, 8) # x_axis values
%matplotlib inline
#x-values, y-values, Name for legend, color
plt.plot(x_axis, relu_score , label = "ReLu RMSProp", c= "g") #Plots a green line
plt.plot(x_axis, softmax_score, label = "Softmax Adamax", c= "b")  #Plots a blue line
plt.xlabel('[0.0001,      0.001,      0.002,      0.005,      0.01,      0.05,      0.1]')
plt.ylabel('Accuracy')
plt.title("Optimal learning rate for Adamax Optimizer and RMSProp Optimizer.")
plt.legend()
print("The best is: Softmax, Adamax, mean_absolute_error with learning_rate of 0.05.\nadamax = optimizers.Adamax(lr= 0.05, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)")

# Automated Testing - Batch Size
### This automated test is for ReLu, RMSProp, mean_squared_error and SoftMax, Adamax, mean_absolute_error for different batch sizes.¶

In [None]:
rms_score = []
adamax_score = []
rms = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
adamax =optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
batch_sizes = [5, 10, 15, 20, 30, 50, 75, 100]
counter = 0;
for i in range(len(batch_sizes)):
    model = Sequential()
    model.add(Dense(8, activation = "relu", input_dim = 4)) #input_dim is # of features
    model.add(Dense(1)) 
    counter += 1
    print("ReLu", batch_sizes[i], "{:.4f}% Complete".format(100*counter/16),"------------------------------------------------------------------")
    rms = optimizers.RMSprop(lr= 0.01, rho=0.9, epsilon=None, decay=0.0)
    model.compile(optimizer = rms, loss = 'mean_squared_error', metrics = ['accuracy'])
    model.fit(X_train, y_train, epochs = 2500, batch_size = batch_sizes[i], class_weight = None)
    scores = model.evaluate(X_test, y_test)
    rms_score.append(scores[1]*100)
    print(scores[1]*100, batch_sizes[i])
for i in range(len(batch_sizes)):
    model = Sequential()
    model.add(Dense(8, activation = "softmax", input_dim = 4)) #input_dim is # of features
    model.add(Dense(1)) 
    counter += 1
    print("SoftMax", batch_sizes[i], "{:.4f}% Complete".format(100*counter/16),"------------------------------------------------------------------")
    adamax = optimizers.Adamax(lr= 0.05, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
    model.compile(optimizer = adamax, loss = 'mean_absolute_error', metrics = ['accuracy'])
    model.fit(X_train, y_train, epochs = 2500, batch_size = batch_sizes[i], class_weight = None)
    scores = model.evaluate(X_test, y_test)
    adamax_score.append(scores[1]*100)
    print(scores[1]*100, batch_sizes[i])
print(rms_score)
print(adamax_score)

In [None]:
print(rms_score)
print(adamax_score)

In [None]:
import matplotlib.pyplot as plt
x_axis = range(1, 9) # x_axis values
%matplotlib inline
#x-values, y-values, Name for legend, color
plt.plot(x_axis, rms_score , label = "ReLu RMSProp", c= "g") #Plots a green line
plt.plot(x_axis, adamax_score, label = "Softmax Adamax", c= "b")  #Plots a blue line
plt.xlabel('[5        10         15           20          30           50          75      100]')
plt.ylabel('Accuracy')
plt.title("Optimal batch size for Adamax Optimizer and RMSProp Optimizer.")
plt.legend()
print("The best is: Softmax, Adamax, mean_absolute_error with batch size of 5 and learning_rate of 0.05.\nadamax = optimizers.Adamax(lr= 0.05, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)")

# Automated Testing - Epoch Size
### This test is for improving the accuracy by finding the best epoch value

In [None]:
erms_score = []
eadamax_score = []
rms = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
adamax =optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
epoches = [500, 1000, 2500, 5000, 8000]
counter = 0;
for i in range(len(epoches)):
    model = Sequential()
    model.add(Dense(8, activation = "relu", input_dim = 4)) #input_dim is # of features
    model.add(Dense(1)) 
    counter += 1
    print("ReLu", epoches[i], "{:.1f}% Complete".format(100*counter/10),"------------------------------------------------------------------")
    rms = optimizers.RMSprop(lr= 0.01, rho=0.9, epsilon=None, decay=0.0)
    model.compile(optimizer = rms, loss = 'mean_squared_error', metrics = ['accuracy'])
    model.fit(X_train, y_train, epochs = epoches[i], batch_size = 5, class_weight = None)
    scores = model.evaluate(X_test, y_test)
    erms_score.append(scores[1]*100)
    print(scores[1]*100, epoches[i])
for i in range(len(epoches)):
    model = Sequential()
    model.add(Dense(8, activation = "softmax", input_dim = 4)) #input_dim is # of features
    model.add(Dense(1)) 
    counter += 1
    print("SoftMax", epoches[i], "{:.1f}% Complete".format(100*counter/10),"------------------------------------------------------------------")
    adamax = optimizers.Adamax(lr= 0.05, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
    model.compile(optimizer = adamax, loss = 'mean_absolute_error', metrics = ['accuracy'])
    model.fit(X_train, y_train, epochs = epoches[i], batch_size = 5, class_weight = None)
    scores = model.evaluate(X_test, y_test)
    eadamax_score.append(scores[1]*100)
    print(scores[1]*100, epoches[i])
print(erms_score)
print(eadamax_score)

In [None]:
print(erms_score)
print(eadamax_score)

In [None]:
import matplotlib.pyplot as plt
x_axis = range(1, 6) # x_axis values
%matplotlib inline
#x-values, y-values, Name for legend, color
plt.plot(x_axis, erms_score , label = "ReLu RMSProp", c= "g") #Plots a green line
plt.plot(x_axis, eadamax_score, label = "Softmax Adamax", c= "b")  #Plots a blue line
plt.xlabel('[500               1000               2500                 5000               8000]')
plt.ylabel('Accuracy')
plt.rcParams["figure.figsize"] = [15, 9]
plt.title("Optimal Epoch Number for Adamax Optimizer and RMSProp Optimizer.")
plt.legend()
plt.figure(num=None, figsize=(15, 9), dpi=80, facecolor='w', edgecolor='k')
plt.show()
print("The best is: Softmax, Adamax, mean_absolute_error with batch size of 5 and learning_rate of 0.05.\nadamax = optimizers.Adamax(lr= 0.05, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)")

# Results for Accuracy based on Softmax Activated Layers

In [None]:
layer_scores = [23.80952388048172, 19.0476194024086, 42.85714328289032, 23.80952388048172, 57.14285969734192, 47.61904776096344, 76.1904776096344, 52.3809552192688, 71.42857313156128, 61.90476417541504, 71.42857313156128, 71.42857313156128, 76.1904776096344, 42.85714328289032, 71.42857313156128]
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
x_axis = range(1, 16) # x_axis values
%matplotlib inline
#x-values, y-values, Name for legend, color
plt.plot(x_axis, layer_scores , label = "Softmax, Adamax", c= "b") #Plots a green line
plt.xlabel('Number of Layers', size = 13)
plt.ylabel('Accuracy', size = 13)
plt.title("Accuracy of the Sequential Model with Different Numbers of Softmax-Activated Layers", size = 10)
plt.legend()
#plt.figure(num=None, figsize=(15, 9), dpi=500, facecolor='w', edgecolor='k')
plt.rcParams["figure.figsize"] = [16, 9]
#plt.tight_layout()
plt.show()

# Optimal Model out of All Tests

In [None]:
model = Sequential()
model.add(Dense(7, activation = "softmax", input_dim = 4)) #input_dim is # of features
model.add(Dense(1)) 
adamax = optimizers.Adamax(lr= 0.05, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
model.compile(optimizer = adamax, loss = 'mean_absolute_error', metrics = ['accuracy'])
model.fit(X_train, y_train, epochs = 5000, batch_size = 5, class_weight = None)
scores = model.evaluate(X_test, y_test)
print("ACCURACY: {:.2f}".format(scores[1]*100))

In [None]:
model = Sequential()
model.add(Dense(7, activation = "softmax", input_dim = 4)) #input_dim is # of features
model.add(Dense(1)) 
adamax = optimizers.Adamax(lr= 0.05, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
model.compile(optimizer = adamax, loss = 'mean_absolute_error', metrics = ['accuracy'])
model.fit(X_train, y_train, epochs = 5000, batch_size = 5, class_weight = None)
scores2 = model.evaluate(X_test, y_test)
print("ACCURACY: {:.2f}".format(scores2[1]*100))