In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
from pathlib import Path




In [2]:
# Save rain_df into csv for optimisasation file
file_path = '../../Resources/raindata.csv'

# Export rain_df to CSV
rain_df=pd.read_csv(file_path)
rain_df

Unnamed: 0.1,Unnamed: 0,stationCode,stationName,latitude,longitude,altitude,owner,ownerCode,startDate,endDate,...,wind2,wind3,apparentTemperature,etoShort,etoTall,frostCondition,heatCondition,windErosionCondition,richardsonUnit,chillHour
0,0,AN001,Allanooka,-29.063612,114.997161,131.0,WA Department of Primary Industries and Region...,DPIRD,2012-06-19,,...,False,False,True,True,True,True,True,True,True,True
1,1,AM001,Amelup,-34.270827,118.268523,200.0,WA Department of Primary Industries and Region...,DPIRD,2019-10-09,,...,False,False,True,True,True,True,True,True,True,True
2,2,SH002,Babakin,-32.125480,118.004060,313.0,WA Department of Primary Industries and Region...,DPIRD,2016-06-22,,...,False,False,True,True,True,True,True,True,True,True
3,3,BA,Badgingarra,-30.338049,115.539491,284.0,WA Department of Primary Industries and Region...,DPIRD,2008-11-19,,...,False,False,True,True,True,True,True,True,True,True
4,4,BP001,Balingup,-33.796200,116.063980,227.0,WA Department of Primary Industries and Region...,DPIRD,2014-10-24,,...,False,False,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220,220,MA003,Yanmah,-34.225653,116.017767,295.0,WA Department of Primary Industries and Region...,DPIRD,2012-12-20,,...,False,False,True,True,True,True,True,True,True,True
221,221,YS,Yilgarn,-31.915622,119.256055,468.0,WA Department of Primary Industries and Region...,DPIRD,2008-11-01,,...,False,False,True,True,True,True,True,True,True,True
222,222,YE001,York East,-31.835878,116.921077,229.0,WA Department of Primary Industries and Region...,DPIRD,2013-11-08,,...,False,False,True,True,True,True,True,True,True,True
223,223,YU001,Yuna,-28.337630,114.989800,329.0,WA Department of Primary Industries and Region...,DPIRD,2012-06-21,,...,False,False,True,True,True,True,True,True,True,True


In [None]:
# Split our preprocessed data into our features and target arrays
y = rain_df["IS_SUCCESSFUL"].values
X = rain_df.drop(["IS_SUCCESSFUL"],axis=1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])
    
    # Allow kerastuner to decide number of neurons in the first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=64,
        max_value=512,
        step=2), activation=activation, input_dim=len(X_train_scaled[0])))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 20)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=64,
            max_value=256,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    
    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [None]:
# Initialize the Keras Tuner
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=100,
    hyperband_iterations=2)

In [None]:
# Define a callback to stop training early if there's no improvement in validation accuracy
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=10,
    restore_best_weights=True
)

In [None]:
# Run the kerastuner search for best hyperparameters
# tuner.search(X_train_scaled,y_train,epochs=50,validation_data=(X_test_scaled,y_test))
tuner.search(X_train_scaled, y_train, epochs=50, validation_split=0.2, callbacks=[early_stopping])

In [None]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
# Build the model with the best hyperparameters and train it on the data
model = tuner.hypermodel.build(best_hyper)
# Display the summary of the best model
model.summary()

In [None]:
model.fit(X_train_scaled, y_train, epochs=120)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test, verbose=2)

# Print the evaluation results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=78)
rf_model.fit(X_train_scaled, y_train)
y_pred_rf = rf_model.predict(X_test_scaled)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
# Print the Random Forest evaluation results
print(f"Random Forest Accuracy: {rf_accuracy}")

In [None]:
# Instantiate the Decision Tree model
decision_tree_model = DecisionTreeClassifier(random_state=78)

# Fit the model to the training data
decision_tree_model.fit(X_train_scaled, y_train)

# Make predictions on the test data
y_pred = decision_tree_model.predict(X_test_scaled)

# Evaluate the model accuracy
accuracy_dt = accuracy_score(y_test, y_pred)
# Print the accuracy
print(f"Decision Tree Model Accuracy: {accuracy}")

In [None]:
# Compare accuracies and select the final model
if model_accuracy > accuracy_dt and model_accuracy > accuracy_rf:
    final_model = model
    print("Using Neural Network as the final model.")
elif accuracy_dt > accuracy_rf:
    final_model = decision_tree_model
    print("Using Decision Tree as the final model.")
else:
    final_model = random_forest_model
    print("Using Random Forest as the final model.")

In [None]:
# Export our model to HDF5 file
final_model.save("AlphabetSoupCharity_Optimisation.h5")