## NEURALNETWORKSANDDEEPLEARNING/TUNINGNEURALNETWORKS/NEURALNETWORKSANDDEEPLEARNING TUNINGNEURALNETWORKS 2 EXERCISE ANSWERS ##
#### Please refer to module 1 of NeuralNetworksAndDeepLearning -TuningNeuralNetworks  for Tasks 1-9
#### Task 1 
##### Load the libraries that are used in this module.
#### Result:


In [None]:
# Helper packages.
import os
import pickle
import pandas as pd
import numpy as np
# Scikit-learn packages.
from sklearn import metrics
from sklearn.model_selection import train_test_split
# TensorFlow and supporting packages.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout
from kerastuner.tuners import RandomSearch
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
import neptune.new as neptune
from neptune.new.integrations.tensorflow_keras import NeptuneCallback


#### Task 2
##### Set the working directory to the data directory.
##### Print the working directory.
#### Result:


In [None]:
# Set 'main_dir' to location of the project folder
from pathlib import Path 
home_dir = Path(".").resolve()
main_dir = home_dir.parent.parent
print(main_dir)
data_dir = str(main_dir) + "/data"
print(data_dir)


#### Task 3
##### Load the dataset `bank_marketing.csv` and save it to `bank_marketing`.
##### Print the first few rows of `bank_marketing`.
#### Result:


In [None]:
bank_marketing = pd.read_csv(data_dir + "/bank_marketing.csv")
bank_marketing.head()


#### Task 4
##### Define a convenience function `ex_data_prep` to perform the data cleaning steps mentioned below.


In [None]:
1. Replace the column `y` in the dataframe, by setting it to 1 if `y` is 'yes', otherwise set `y` to 0.
2. Replace the missing values in variable `pdays` with the mean value of the column
3. Perform one hot encoding on the variables with data type object (i.e `job`, `marital`, `education`, `default`, `housing`, `loan`, `contact`, `month`, `day_of_week` and `poutcome`) except the target variable `y`
4. Drop the original variables and concatenate the dummies to original datset
5. Select the predictors by dropping variable `y` and save the result to a dataframe `X_ex`
6. Save the target variable `y` column to `y_ex` variable
7. Set the seed to 1
8. Split the data into training, test, and validation sets with 70:15:15 ratio and save respective variables to `X_train_ex`, `X_test_ex`, `X_val_ex`, `y_train_ex`, `y_test_ex`, `y_val_ex`
9. Scale the train, test and the validation datasets using Min max scaler and save as `X_train_scaled_ex`, `X_test_scaled_ex` and `X_val_scaled_ex` respectiely


#### Result:


In [None]:
def ex_data_prep(df):
    
    # Convert `y` to 0/1 values
    df['y'] = np.where(df['y'] == 'yes', 1, 0)
    
    # Replace missing vbalues in `pdays`
    df = df.fillna(df.mean()['pdays'])
    
    # Perform one hot encoding
    job_dummy = pd.get_dummies(df['job'], prefix = 'job', drop_first = True)
    marital_dummy = pd.get_dummies(df['marital'], prefix = 'marital', drop_first = True)
    education_dummy = pd.get_dummies(df['education'], prefix = 'education', drop_first = True)
    default_dummy = pd.get_dummies(df['default'], prefix = 'default', drop_first = True)
    housing_dummy = pd.get_dummies(df['housing'], prefix = 'housing', drop_first = True)
    loan_dummy = pd.get_dummies(df['loan'], prefix = 'loan', drop_first = True)
    contact_dummy = pd.get_dummies(df['contact'], prefix = 'contact', drop_first = True)
    month_dummy = pd.get_dummies(df['month'], prefix = 'month', drop_first = True)
    day_of_week_dummy = pd.get_dummies(df['day_of_week'], prefix = 'day_of_week', drop_first = True)
    poutcome_dummy = pd.get_dummies(df['poutcome'], prefix = 'poutcome', drop_first = True)
    
    # Drop the original variables 
    df.drop(['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 
                    'poutcome'], axis = 1, inplace = True)
    
    #Concatenate the dummies to original dataset
    df = pd.concat([df,job_dummy,marital_dummy,education_dummy,default_dummy,housing_dummy,loan_dummy
                            ,contact_dummy,month_dummy,day_of_week_dummy,poutcome_dummy], axis=1)
    
    # Separate predictors from target variable.
    X_ex = df.drop(['y'], axis=1)
    y_ex = df['y']
    
    # Set the seed to 1.
    np.random.seed(1)
    # Split data into train, test, and validation set, use a 70 - 15 - 15 split.
    # First split data into train-test with 70% for train and 30% for test.
    X_train_ex, X_test_ex, y_train_ex, y_test_ex = train_test_split(X_ex.values,
                                                    y_ex,
                                                    test_size = .3,
                                                    random_state = 1)
    # Then split the test data into two halves: test and validation. 
    X_test_ex, X_val_ex, y_test_ex, y_val_ex = train_test_split(X_test_ex,
                                                y_test_ex,
                                                test_size = .5,
                                                random_state = 1)
    print("Train shape:", X_train_ex.shape, "Test shape:", X_test_ex.shape, "Val shape:", X_val_ex.shape)
    
    # Transforms features by scaling each feature to a given range.
    # The default is the range between 0 and 1.
    min_max_scaler = preprocessing.MinMaxScaler()
    X_train_scaled_ex = min_max_scaler.fit_transform(X_train_ex)
    X_test_scaled_ex = min_max_scaler.transform(X_test_ex)
    X_val_scaled_ex = min_max_scaler.transform(X_val_ex)
    
    return X_train_scaled_ex, X_test_scaled_ex, X_val_scaled_ex, y_train_ex, y_test_ex, y_val_ex
  
X_train_scaled_ex, X_test_scaled_ex, X_val_scaled_ex, y_train_ex, y_test_ex, y_val_ex = ex_data_prep(bank_marketing)


#### Task 5
##### Define a list `METRICS` with the performance metrics - true positives, false positives, true negatives, false negatives, accuracy, precision, recall and auc.
#### Result:


In [None]:
METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
]


#### Task 6
##### Define the function to create a simple sequential neural network model with 32 neurons for the 1st hidden layer, 32 neurons for the second layer, and appropriate input and output layers, default learning rate as 0.75 and name the model `ex_create_model`
##### Compile the model using "adam" optimizer, "binary_crossentropy" loss, and using `METRICS `list as metrics
#### Result:


In [None]:
# Define and compile the model
def ex_create_model(lr = .75):
    
    # Let's set the seed so that we can reproduce the results.
    tf.random.set_seed(1)
    opt = Adam(learning_rate = lr) # <- set optimizer
    model = Sequential([
          Dense(32, activation='relu', input_dim = X_train_scaled_ex.shape[1]),#<- set input and 1st hidden layer
          Dense(32, activation='relu'),              #<- set 2nd hidden layer
          Dense(1, activation='sigmoid')             #<- set output layer
    ])
    
    model.compile(optimizer = opt,            #<- set optimizer
                  loss='binary_crossentropy', #<- set loss function to binary_crossentropy
                  metrics= METRICS)           #<- set performance metric
    return model


#### Task 7
##### Create a new Exercise project in your Neptune account
##### Initialize Neptune client using the init function
#### Result:


In [None]:
run = neptune.init(project='USER_NAME/PROJECT_NAME',
             api_token = 'API_TOKEN')


#### Task 8
##### Create a Neptune callback and save it as `neptune_callback`; add it to `callbacks` list.
##### Using `ex_create_model` function defined above, instantiate a keras model, and save it as `model`.
##### Fit the model using the same parameters as above, but also adding the `callbacks` argument to it.
##### Go to Neptune website to check the result
#### Result:


In [None]:
# Define Neptune callback function and add to a list of callbacks.
neptune_callback = NeptuneCallback(run=run)
callbacks = [neptune_callback]
# Create and compile the base model.
model = ex_create_model()
model_default_ex = model.fit(X_train_scaled_ex, y_train_ex,
                                epochs=25,
                                validation_data=(X_val_scaled_ex, y_val_ex),
                                callbacks=callbacks)


#### Task 9
##### Predict and evaluate the model on test data. Save the result in the variable `result`.
##### Print the result.
#### Result:


In [None]:
result = model.evaluate(X_test_scaled_ex, y_test_ex)
print(result)


#### Please refer to module 2 of NeuralNetworksAndDeepLearning-TuningNeuralNetworks for Tasks 10-14
#### Task 10
##### Define the function `ex_tune_model` to tune the optimal


In [None]:
1. activation function among `softmax, softplus, softsign, relu, tanh, sigmoid, hard_sigmoid, linear`
2. number of neurons with the min_value as 8, max_value as 128 and step size as 16 in all hidden layers.
3. Add a dropout layer with the min_value set to 0.0, max_value as 0.5, default value as 0.25 and step size of 0.05.
4. Optimizer among `adam, sgd, rmsprop`.
5. Learning rate among 1e-2, 1e-3, 1e-4.


##### Compile the model and set the metric to `accuracy`.
#### Result:


In [None]:
def ex_tune_model(hp):
    
    units = hp.Int('units',
                  min_value=8,
                  max_value=128,
                  step=16)
    
    activation = hp.Choice('activation',
                            [
                              'softmax','softplus','softsign','relu',
                              'tanh','sigmoid','hard_sigmoid','linear'
                            ])
    
    dropout_1 = hp.Float('dropout_1',
                        min_value=0.0,
                        max_value=0.5,
                        default=0.25,
                        step=0.05)
    
    optimizer = hp.Choice('optimizer', ['adam', 'sgd', 'rmsprop'])
    
    lr = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
            
    model = keras.Sequential()
    
    model.add(keras.layers.Dense(units=units,
                                 activation=activation,
                                 input_dim = X_train_scaled_ex.shape[1]))
            
    model.add(keras.layers.Dense(units=units,
                                 activation=activation))
            
    model.add(Dropout(rate=dropout_1))
            
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    
    model.compile(
        optimizer=optimizer,
        loss = 'binary_crossentropy',
        metrics = ['accuracy'])
    
    return model


#### Task 11
##### Set the MAX_TRIALS to 10 and EXECUTIONS_PER_TRIAL to 5.
##### Define the Random search tuner.
##### Set the directory and project_name to `ex_final_tuned_model`.
##### Look up the search space and tune the model using the search function.
#### Result:


In [None]:
MAX_TRIALS = 10
EXECUTIONS_PER_TRIAL = 5
tuner = RandomSearch(
    ex_tune_model,
    objective='val_accuracy',
    max_trials=MAX_TRIALS,
    executions_per_trial=EXECUTIONS_PER_TRIAL,
    directory='ex_final_tuned_model',
    project_name='ex_final_tuned_model',
    seed=1    
)
tuner.search_space_summary()
tuner.search(x=X_train_scaled_ex,
             y=y_train_ex,
             epochs=25,
             validation_data=(X_val_scaled_ex, y_val_ex))
    


#### Task 12
##### View the optimal parameters using get_best_trials.
#### Result:


In [None]:
opt_model_result = tuner.oracle.get_best_trials(num_trials=1)[0].hyperparameters.values
opt_model_result


#### Task 13
##### Define the function `ex_create_optimized_model` and set the parameters to optimal values obtained earlier.
#### Result:


In [None]:
def ex_create_optimized_model(units, 
                       activation, 
                       dropout_1,
                       optimizer,
                       learning_rate,
                       dropout_seed = 1):
    
    # Set up model.
    model = Sequential()
    model.add(Dense(units,
                    input_dim = X_train_scaled_ex.shape[1],
                    activation = activation))     
    model.add(Dense(units,
                    activation = activation))     
    
    if dropout_1 is not None:
        model.add(Dropout(rate = dropout_1, seed = dropout_seed))
    
    model.add(Dense(1, activation = 'sigmoid'))
    
    # Compile model.
    model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = METRICS)
    return model
  


#### Task 14
##### Fit the model using the same parameters as above, and also add the `callbacks` argument to it.
##### Open Neptune and view the results.
#### Result:


In [None]:
tb_model = ex_create_optimized_model(**opt_model_result)
tb_model.fit(X_train_scaled_ex,
             y_train_ex,
             validation_data = (X_val_scaled_ex, y_val_ex),
             epochs = 25,
             verbose = 0,          #<- silence the epoch output in console (optional)
             callbacks = callbacks)#<- add callbacks


#### Task 15
##### Evaluate the optimized model on test data using `tb_model.evaluate`  and compare the results with the base model.
#### Result:


In [None]:
tb_model.evaluate(X_test_scaled_ex, y_test_ex)
model.evaluate(X_test_scaled_ex, y_test_ex)
