In [None]:
# Install kerastuner
!pip install keras-tuner

In [2]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

In [3]:
#  Import and read the charity_data.csv.
url = "https://raw.githubusercontent.com/kevogil/charity-funding-predictor/main/Resources/charity_data.csv"
application_df = pd.read_csv(url)

# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df.drop(columns=['EIN', 'NAME', 'ORGANIZATION', 'SPECIAL_CONSIDERATIONS'], inplace=True)

# Look at APPLICATION_TYPE value counts for binning
app_cnt = application_df['APPLICATION_TYPE'].value_counts()

# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
cutoff_point = 500
application_types_to_replace = app_cnt[app_cnt < cutoff_point].index.tolist()


# Replace in dataframe
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# Look at CLASSIFICATION value counts for binning
class_cnt = application_df['CLASSIFICATION'].value_counts()

# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
cutoff_point = 1000
classifications_to_replace = class_cnt[class_cnt < cutoff_point].index.tolist()

# Replace in dataframe
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")
    
# Convert categorical data to numeric with `pd.get_dummies`
numeric_application_df = pd.get_dummies(application_df)

# Split our preprocessed data into our features and target arrays
y = numeric_application_df['IS_SUCCESSFUL']
x = numeric_application_df.drop(columns=['IS_SUCCESSFUL'])

# Split the preprocessed data into a training and testing dataset
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42)

# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
x_scaler = scaler.fit(x_train)

# Scale the data
x_train_scaled = x_scaler.transform(x_train)
x_test_scaled = x_scaler.transform(x_test)

In [4]:
### Compile, Train and Evaluate the Model

In [5]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=len(x_train_scaled[0])))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [6]:
# Import the kerastuner library
import kerastuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

INFO:tensorflow:Reloading Oracle from existing project ./untitled_project/oracle.json
INFO:tensorflow:Reloading Tuner from ./untitled_project/tuner0.json


  


In [7]:
# Run the kerastuner search for best hyperparameters
tuner.search(x_train_scaled,y_train,epochs=20,validation_data=(x_test_scaled,y_test))

Trial 61 Complete [00h 00m 30s]
val_accuracy: 0.7248979806900024

Best val_accuracy So Far: 0.7271137237548828
Total elapsed time: 00h 01m 02s
INFO:tensorflow:Oracle triggered exit


In [8]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(3)[0]
best_hyper.values

{'activation': 'tanh',
 'first_units': 5,
 'num_layers': 2,
 'tuner/bracket': 1,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 7,
 'tuner/round': 1,
 'tuner/trial_id': 'df65570c8935180e15bed8bd93f1df2f',
 'units_0': 5,
 'units_1': 9,
 'units_2': 3,
 'units_3': 9,
 'units_4': 7}

In [9]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(3)[0]
model_loss, model_accuracy = best_model.evaluate(x_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.5659 - accuracy: 0.7271 - 404ms/epoch - 2ms/step
Loss: 0.5658575892448425, Accuracy: 0.7271137237548828
