In [1]:
# Import dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import sklearn as skl
import pandas as pd
import tensorflow as tf

In [2]:
# Import Hg dataset
Hg_All = pd.read_csv('../Data/Hg_All.csv')
Hg_All.head()

Unnamed: 0.1,Unnamed: 0,Hg0_pM,HgT_pM,DMHg_pM,MMHg_pM,PRES,TEMP,PSAL,DOXY,NITRAT,NITRIT,SILCAT,PHSPHT
0,0,0.16,1.39,,0.012,20.2,-0.7891,29.412,433.4,0.33,0.0,3.53,0.7
1,1,0.16,1.39,,0.012,20.2,-0.7891,29.412,433.4,0.01,0.0,2.06,0.63
2,2,0.04,0.94,,0.026,44.3,-1.2086,31.7411,399.2,6.8,0.13,25.65,1.34
3,3,0.04,0.94,,0.026,44.3,-1.2086,31.7411,398.4,5.1,0.12,23.21,1.26
4,4,0.11,1.73,,0.045,80.7,-1.5536,32.3635,318.8,11.79,0.02,28.68,1.57


In [3]:
# Drop unwanted columns
Hg_All.drop(columns=['Unnamed: 0','HgT_pM','DMHg_pM','MMHg_pM','NITRIT','SILCAT','PHSPHT'], inplace=True)
Hg_All.shape

(1443, 6)

In [4]:
Hg_All = Hg_All.dropna()
Hg_All.shape

(873, 6)

In [5]:
# Split our preprocessed data into our features and target arrays
X = Hg_All.drop(['Hg0_pM'], axis=1)
y = Hg_All['Hg0_pM']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=5)

In [6]:
# Create scaler instance
X_scaler = skl.preprocessing.StandardScaler()

# Fit the scaler
X_scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [7]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=5,
        step=1), activation=activation, input_dim=5))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="relu"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [8]:
# Import the kerastuner library
import kerastuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

INFO:tensorflow:Reloading Oracle from existing project .\untitled_project\oracle.json


  


INFO:tensorflow:Reloading Tuner from .\untitled_project\tuner0.json


In [9]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

INFO:tensorflow:Oracle triggered exit


In [10]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'tanh',
 'first_units': 7,
 'num_layers': 2,
 'units_0': 5,
 'units_1': 3,
 'units_2': 3,
 'units_3': 9,
 'units_4': 7,
 'units_5': 5,
 'tuner/epochs': 7,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 1,
 'tuner/round': 0}

In [11]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

7/7 - 0s - loss: 1.5790 - accuracy: 0.1233
Loss: 1.5790430307388306, Accuracy: 0.12328767031431198
