### Deliverable 1: Preprocessing the Data for a Neural Network

In [30]:
!pip install keras_tuner



In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
import pandas as pd 
application_df = pd.read_csv("Resources/charity_data.csv")
#application_df.head()

In [2]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_drop_column = application_df.drop(columns=["EIN", "NAME"])
#application_drop_column

In [3]:
# Determine the number of unique values in each column.
application_drop_column.nunique()

APPLICATION_TYPE            17
AFFILIATION                  6
CLASSIFICATION              71
USE_CASE                     5
ORGANIZATION                 4
STATUS                       2
INCOME_AMT                   9
SPECIAL_CONSIDERATIONS       2
ASK_AMT                   8747
IS_SUCCESSFUL                2
dtype: int64

In [4]:
# Look at APPLICATION_TYPE value counts for binning
app_count=application_drop_column["APPLICATION_TYPE"].value_counts()
#app_count

In [5]:
# # Visualize the value counts of APPLICATION_TYPE
# app_count.plot.density()

In [6]:
# Determine which values to replace if counts are less than ...?
replace_application = list(app_count[app_count < 500].index)

# Replace in dataframe
for app in replace_application:
    application_drop_column.APPLICATION_TYPE = application_drop_column.APPLICATION_TYPE.replace(app,"Other")
    
# Check to make sure binning was successful
#application_drop_column.APPLICATION_TYPE.value_counts()

In [7]:
# Look at CLASSIFICATION value counts for binning
classification_count=application_drop_column["CLASSIFICATION"].value_counts()
#classification_count

In [8]:
# Visualize the value counts of CLASSIFICATION
#classification_count.plot.density()

In [9]:
# Determine which values to replace if counts are less than ..?
replace_class = list(classification_count[classification_count < 1882].index)

# Replace in dataframe
for cls in replace_class:
    application_drop_column.CLASSIFICATION = application_drop_column.CLASSIFICATION.replace(cls,"Other")
    
# Check to make sure binning was successful
#application_drop_column.CLASSIFICATION.value_counts()

In [10]:
# Generate our categorical variable lists
application_cat = application_drop_column.dtypes[application_drop_column.dtypes == "object"].index.tolist()
#application_cat

In [11]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(application_drop_column[application_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(application_cat)
#encode_df.head()



In [12]:
# Merge one-hot encoded features and drop the originals
application_drop_column = application_drop_column.merge(encode_df,left_index=True, right_index=True)
application_drop_column = application_drop_column.drop(application_cat,1)
#application_drop_column.head()

  This is separate from the ipykernel package so we can avoid doing imports until


In [13]:
# Split our preprocessed data into our features and target arrays
y = application_drop_column["IS_SUCCESSFUL"].values
X = application_drop_column.drop(["IS_SUCCESSFUL"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

  This is separate from the ipykernel package so we can avoid doing imports until


In [14]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Deliverable 2: Compile, Train and Evaluate the Model

In [27]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=43))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [28]:
# Import the kerastuner library
import kerastuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

INFO:tensorflow:Reloading Tuner from .\untitled_project\tuner0.json


In [29]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 10 Complete [00h 00m 07s]
val_accuracy: 0.723498523235321

Best val_accuracy So Far: 0.7261807322502136
Total elapsed time: 00h 00m 31s

Search: Running Trial #11

Value             |Best Value So Far |Hyperparameter
tanh              |tanh              |activation
3                 |5                 |first_units
4                 |3                 |num_layers
7                 |1                 |units_0
3                 |9                 |units_1
9                 |7                 |units_2
3                 |1                 |units_3
7                 |9                 |units_4
3                 |3                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/3
Epoch 2/3

KeyboardInterrupt: 

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30
nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

In [None]:
# # Import checkpoint dependencies
# import os
# from tensorflow.keras.callbacks import ModelCheckpoint

# # Define the checkpoint path and filenames
# os.makedirs("checkpoints/",exist_ok=True)
# checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# # Create a callback that saves the model's weights every epoch
# cp_callback = ModelCheckpoint(
#     filepath=checkpoint_path,
#     verbose=1,
#     save_weights_only=True,
#     save_freq=5)

In [None]:
# Train the model
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=5)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# # Export our trained model to HDF5 file
# nn.save("AlphabetSoupCharity.h5")
# # Import the model to a new object
# nn_imported = tf.keras.models.load_model('AlphabetSoupCharity.h5')