# Lesson 9.03 Deep Learning Optimization (Solution Code)

## Import Libraries

In [1]:
# !pip install scikeras

In [2]:
import numpy
import pandas as pd
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
import warnings
warnings.filterwarnings('ignore')
from scikeras.wrappers import KerasClassifier



## Load Data

In [3]:
# training data
# outcome value = 1 means diabetic while Outcome value = 0 means non-diabetic
data = pd.read_csv('diabetes.csv')
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
# split into predictors and response
X = data.iloc[:,0:8] #predictors
y = data.iloc[:,8] #target

## Create Model

In [5]:
# function to create model for KerasClassifier
def create_model():
    #define model
    model = Sequential() # keras sequential model
    model.add(Input(shape=(8,)))
    model.add(Dense(12, activation='relu')) # input layer contains 8 Independent Variables
    model.add(Dense(1, activation='sigmoid')) # binary classification use sigmoid
    # more info on which activation function to use at machinelearningmastery.com/choose-an-activation-function-for-deep-learning/
    
    # compile model
    # use binary_crossentropy as loss function for binary classification problems
    # on the other hand use categorical_crossentropy as loss function for multi-classification problems
    # use adam optimizer as default
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [6]:
# create model
model = KerasClassifier(model=create_model, verbose=0)

## Optimizing Batch and Epoch

In [7]:
# determine grid search parameters

# The batch size defines the number of samples that will be propagated through the network.
# More info at https://stats.stackexchange.com/questions/153531/what-is-batch-size-in-neural-network
batchSize = [10, 20, 40, 60, 80, 100]

# An epoch is an iteration over the entire x and y data provided.
# More info at https://www.tensorflow.org/api_docs/python/tf/keras/Sequential
epochs = [10, 30, 50]

In [8]:
# store parameters and values in a dictionary
parameter_grid = dict(batch_size=batchSize, epochs=epochs)

# n_jobs = -1 means use all resources / processors avaiable in CPU
grid = GridSearchCV(estimator=model, param_grid=parameter_grid, n_jobs=-1)
grid_result = grid.fit(X, y)



In [9]:
# summarize accuracy results for best set of parameter values
print('Best: %f using %s' % (grid_result.best_score_, grid_result.best_params_))

Best: 0.692743 using {'batch_size': 10, 'epochs': 50}


## Tuning Optimizer Function
Find the best optimizer that fits the model

In [10]:
# create an updated function to tune optimizer 
def create_model():
    #define model
    model = Sequential() # keras sequential model
    model.add(Input(shape=(8,)))
    model.add(Dense(12, activation='relu')) # input layer contains 8 Independent Variables
    model.add(Dense(1, activation='sigmoid')) # binary classification use sigmoid
    # more info on which activation function to use at machinelearningmastery.com/choose-an-activation-function-for-deep-learning/
    
    # compile model
    # use binary_crossentropy as loss function for binary classification problems
    # on the other hand use categorical_crossentropy as loss function for multi-classification problems
    # model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [11]:
# create model using updated function and best param values for epochs and batch_size as found above
model = KerasClassifier(model=create_model, loss='binary_crossentropy', metrics=['accuracy'], epochs=50, batch_size=10)

In [12]:
# define grid search parameters
optimizer = ['adam', 'SGD']
parameter_grid = dict(optimizer=optimizer)

In [13]:
# n_jobs = -1 means use all resources / processors avaiable in CPU
grid = GridSearchCV(estimator=model, param_grid=parameter_grid, n_jobs=-1)
grid_result = grid.fit(X, y)

Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
Epoch 1/50
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5765 - loss: 4.92467 
Epoch 2/50
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6180 - loss: 3.9633  
Epoch 2/50
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5775 - loss: 6.0492
Epoch 2/50
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5811 - loss: 12.2081
Epoch 2/50
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6169 - loss: 6.0890
Epoch 2/50
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 894us/step - accuracy: 0.6604 - loss: 0.7177
Epoch 3/50
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 956us/step - accuracy: 0.6877 - loss: 0.6879
Epoch 3/50
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [14]:
# summarize accuracy results for best set of parameter values
print('Best: %f using %s' % (grid_result.best_score_, grid_result.best_params_))

Best: 0.730515 using {'optimizer': 'adam'}
