In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
from pathlib import Path




In [2]:
# Save rain_df into csv for optimisasation file
file_path = '../../Resources/raindata.csv'

# Export rain_df to CSV
rain_df=pd.read_csv(file_path)
rain_df

Unnamed: 0.1,Unnamed: 0,date,stationCode,stationName,rainfall,relativeHumidity,airTemperature_avg,wind_avg_speed
0,0,2023-01-01,AN001,Allanooka,0.0,66.1,21.0,17.93
1,1,2023-01-02,AN001,Allanooka,0.0,52.2,25.2,16.49
2,2,2023-01-03,AN001,Allanooka,0.0,19.1,32.2,24.77
3,3,2023-01-04,AN001,Allanooka,0.0,22.9,32.1,21.67
4,4,2023-01-05,AN001,Allanooka,0.0,17.8,33.3,18.12
...,...,...,...,...,...,...,...,...
47384,47715,2023-08-09,YU002,Yuna NE,0.2,74.2,14.2,5.72
47385,47716,2023-08-10,YU002,Yuna NE,0.0,72.1,14.8,5.71
47386,47717,2023-08-11,YU002,Yuna NE,0.0,72.8,16.8,6.62
47387,47718,2023-08-12,YU002,Yuna NE,0.0,75.7,15.9,7.51


In [5]:
# Split our preprocessed data into our features and target arrays
y = rain_df["rainfall"].values
# X = rain_df.drop(["rainfall","0","stationName"],axis=1).values
X = rain_df.drop(["rainfall", "Unnamed", "stationName"], axis=1).reset_index(drop=True).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

KeyError: "['Unnamed'] not found in axis"

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])
    
    # Allow kerastuner to decide number of neurons in the first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=64,
        max_value=512,
        step=2), activation=activation, input_dim=len(X_train_scaled[0])))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 20)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=64,
            max_value=256,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    
    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [None]:
# Initialize the Keras Tuner
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=100,
    hyperband_iterations=2)

In [None]:
# Define a callback to stop training early if there's no improvement in validation accuracy
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=10,
    restore_best_weights=True
)

In [None]:
# Run the kerastuner search for best hyperparameters
# tuner.search(X_train_scaled,y_train,epochs=50,validation_data=(X_test_scaled,y_test))
tuner.search(X_train_scaled, y_train, epochs=50, validation_split=0.2, callbacks=[early_stopping])

In [None]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
# Build the model with the best hyperparameters and train it on the data
model = tuner.hypermodel.build(best_hyper)
# Display the summary of the best model
model.summary()

In [None]:
model.fit(X_train_scaled, y_train, epochs=120)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test, verbose=2)

# Print the evaluation results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=78)
rf_model.fit(X_train_scaled, y_train)
y_pred_rf = rf_model.predict(X_test_scaled)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
# Print the Random Forest evaluation results
print(f"Random Forest Accuracy: {rf_accuracy}")

In [None]:
# Instantiate the Decision Tree model
decision_tree_model = DecisionTreeClassifier(random_state=78)

# Fit the model to the training data
decision_tree_model.fit(X_train_scaled, y_train)

# Make predictions on the test data
y_pred = decision_tree_model.predict(X_test_scaled)

# Evaluate the model accuracy
accuracy_dt = accuracy_score(y_test, y_pred)
# Print the accuracy
print(f"Decision Tree Model Accuracy: {accuracy}")

In [None]:
# Compare accuracies and select the final model
if model_accuracy > accuracy_dt and model_accuracy > accuracy_rf:
    final_model = model
    print("Using Neural Network as the final model.")
elif accuracy_dt > accuracy_rf:
    final_model = decision_tree_model
    print("Using Decision Tree as the final model.")
else:
    final_model = random_forest_model
    print("Using Random Forest as the final model.")

In [None]:
# Export our model to HDF5 file
final_model.save("AlphabetSoupCharity_Optimisation.h5")