This version includes code copied from the website, especially when it comes to building the model itself

In [1]:
from numpy import loadtxt #allows to load data from a text file. Documentation here: https://numpy.org/doc/stable/reference/generated/numpy.loadtxt.html

dataset = loadtxt('https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv', delimiter=',') #delimiter is what separates the diferent values
print(dataset)
print(dataset.shape)

[[  6.    148.     72.    ...   0.627  50.      1.   ]
 [  1.     85.     66.    ...   0.351  31.      0.   ]
 [  8.    183.     64.    ...   0.672  32.      1.   ]
 ...
 [  5.    121.     72.    ...   0.245  30.      0.   ]
 [  1.    126.     60.    ...   0.349  47.      1.   ]
 [  1.     93.     70.    ...   0.315  23.      0.   ]]
(768, 9)


In [2]:
x = dataset[:,0:8]
y = dataset[:,8]
print(f'Output')
print(f'------')
print(f'x shape is:', x.shape, 'while x type is:', type(x))
print(f'y shape is:', y.shape, 'while y type is:', type(y))

Output
------
x shape is: (768, 8) while x type is: <class 'numpy.ndarray'>
y shape is: (768,) while y type is: <class 'numpy.ndarray'>


In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import talos
#from talos.utils.metrics import f1score 
# for some reason, this does not work so I just quoted the entire path directly. Might be worth looking into why it didnt work
#from talos.utils.callbacks import TrainingPlot
# same with this

def diabetes(x_train, y_train, x_val, y_val, params):
    #This, here, is the model
    #Parameters under iteration are: first neuron size, first activation, kernel initializer, first dropout, second neuron size, second activation, second dropout, last activation
    model = Sequential()
    model.add(Dense(params['first_neuron'], input_dim = x_train.shape[1], activation = params['first_activation'], kernel_initializer = params['kernel_initializer']))
    model.add(Dropout(params['first_dropout']))
    model.add(Dense(1, activation = params['last_activation']))

    #At this point, we are compiling. The iterations are: loss, optimizer and the metrics in use are accuracy and f1score
    model.compile(loss = 'binary_crossentropy', optimizer = params ['optimizer'], metrics = ['accuracy', talos.utils.metrics.f1score])

    #Then we do the model fitting to the data and assign the result to 'out' variable. Iterations are: batch_size and epochs
    # out = model.fit(x_train, y_train, validation_data = [x_val, y_val], batch_size = params['batch_size'], callbacks = [talos.callbacks.TrainingPlot(metrics = ['f1score'])], epochs = params['epochs'], verbose = 0)

    #We remove the callback to plot f1score because we want to see the progress bar
    out = model.fit(x_train, y_train, validation_data = [x_val, y_val], batch_size = params['batch_size'], epochs = 100, verbose = 0)

    return out, model

In [4]:
from tensorflow.keras.activations import relu, elu, sigmoid, tanh #since these are only available in the library , we have to import the library itself for the model to interpret the dictionary accordingly
#The model can interpret the functions on its own if its fed directly, but now we are not feeding it directly from its own internal dictionary, we are feeding it from the dictionary that we created hence need to import it to our dictionary first
p = {
    'lr':[0.5,5,10],
    'hidden_layers': [0,1,2],
    'epochs':[20,30,40],
    'first_dropout':[0,0.2,0.25],
    'first_neuron': [12, 24, 36],
    'first_activation': ['relu', 'elu'],
    'kernel_initializer': ['uniform', 'normal'],
    'second_neuron': [12, 24, 36],
    'second_activation': ['relu', 'elu'],
    'second_dropout':[0,0.2,0.4],
    'last_activation': ['sigmoid', 'tanh'],
    'optimizer': ['Adam', 'Nadam'],
    'losses': ['binary_crossentropy'],
    'batch_size':[20,40],
    'epochs':[50,100]
}

In [5]:
#this is the .scan method that I wrote about in the paper.
#the first two arguments make sense. params is the dictionary with the parameters
#model is the name of the model that you just built. In our case, we encased our model into a function hence the name of the function. Otherwise, we would have model being model (since that is the name of our model)
#Experiment_name is used to create the experiment's log folder (where the data is stored)
t = talos.Scan(x = x, y = y, params = p, model = diabetes, experiment_name = 'diabetes')
t.data.to_csv('analysis.csv')

  0%|          | 1/93312 [00:15<392:55:31, 15.16s/it]

KeyboardInterrupt: 