### Import Dependencies

In [None]:
# for machine learning/neural network
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import keras_tuner as kt

# for data handling
import pandas as pd

# general use
from os.path import join

### Preprocessing

In [None]:
# bring in the raw data
df0 = pd.read_csv(join("resources", "charity_data.csv"))

# preview the raw data
df0.head()

In [None]:
# EIN and NAME are unnecessary for the neural net, so we'll drop them from the dataset
df1 = df0.drop(["EIN", "NAME"], axis = 1)

# preview the data
df1.head()

In [None]:
# define unique item threshold
unique_item_count = 10

# check for columns that require modification
modify_columns = []
value_count_lists = []
for col in df1.nunique().items():
    if (col[1] > 10) and (df1[col[0]].dtype == "object"):
        modify_columns.append(col[0])
        value_count_lists.append(df1[col[0]].value_counts())
        print(df1[col[0]].value_counts())
        print()

In [None]:
# specify cutoff values for the relevant columns
cutoff_values = [200, 1000]

# modify the specified columns
for i in range(len(modify_columns)):
    
    # assemble a list of items to be replaced
    items_to_replace = []
    for item in value_count_lists[i].items():
        if item[1] < cutoff_values[i]:
            items_to_replace.append(item[0])
    
    # replace the items of the associated column
    for item in items_to_replace:
        df1[modify_columns[i]] = df1[modify_columns[i]].replace(item, "other")
    
    # display the modified column
    print(df1[modify_columns[i]].value_counts())
    print()

In [None]:
# replace categorical data with numerical data
df2 = pd.get_dummies(df1)
df2.head()

In [None]:
# define features and outputs
y = df2["IS_SUCCESSFUL"].values
X = df2.drop("IS_SUCCESSFUL", axis = 1).values

# split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)

# create and fit the scaler
scaler = StandardScaler().fit(X_train)

# scale the features
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Neural Net Builder

In [None]:
# build the neural net model method
def build_model(hp):
    
    # instantiate the model
    model = tf.keras.models.Sequential()
    
    # populate activation function options
    activation_options = hp.Choice("activation", ["relu", "tanh", "sigmoid"])
    
    # populate initial layer neurons
    model.add(tf.keras.layers.Dense(
        units = hp.Int("first_units", 
                       min_value = 2, 
                       max_value = 10, 
                       step = 2), 
        activation = activation_options, 
        input_dim = X_train_scaled.shape[1]))
    
    # populate hidden layer neurons
    for i in range(hp.Int("num_layers", 1, 10)):
        model.add(tf.keras.layers.Dense(
            units = hp.Int("units_" + str(i),
                          min_value = 2,
                          max_value = 10,
                          step = 2),
            activation = activation_options))
    
    # populate output layer neurons
    model.add(tf.keras.layers.Dense(units = 1, activation = "sigmoid"))
    
    # compile the model
    model.compile(loss = "binary_crossentropy", optimizer = "adam", metrics = ["accuracy"])
    
    return model

### Compile, Train, and Evaluate

In [None]:
# instantiate the tuner
tuner = kt.Hyperband(
    build_model,
    objective = "val_accuracy",
    max_epochs = 50,
    overwrite = True,
    hyperband_iterations = 3)

In [None]:
# run the tuner
tuner.search(X_train_scaled, y_train, epochs = 50, validation_data = (X_test_scaled, y_test))

In [None]:
# retrieve the highest performing hyperparameters
best_hps = tuner.get_best_hyperparameters(1)[0]

# preview the hyperparameters
best_hps.values

In [None]:
# evaluate the model's performance
best_model = tuner.get_best_models(1)[0]
model_loss, model_acc = best_model.evaluate(X_test_scaled, y_test, verbose = 2)
print(f"Loss: {model_loss:,.4f}, Accuracy: {model_acc:,.4f}")

# OUTPUT...
# 268/268 - 0s - loss: 0.5727 - accuracy: 0.7341 - 375ms/epoch - 1ms/step
# Loss: 0.5727, Accuracy: 0.7341

In [None]:
# save the model
best_model.save(join("output", "alphabet_soup_charity_optimized.h5"))