# Step 3: Optimize the Model

## Using your knowledge of TensorFlow, optimize your model in order to achieve a target predictive accuracy higher than 75%. If you can't achieve an accuracy higher than 75%, you'll need to make at least three attempts to do so.

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import pandas as pd
import tensorflow as tf
import pandas as pd 



In [2]:
#  Import and read the charity_data.csv.

n_df = pd.read_csv("Resources/charity_data.csv")
n_df.head()


Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [3]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
n_df = n_df.drop(labels=["EIN", "NAME"], axis=1)
n_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [4]:

n_counts = n_df["APPLICATION_TYPE"].value_counts()


replace_application = list(n_counts[n_counts < 500].index)


for app in replace_application:
    n_df["APPLICATION_TYPE"] = n_df["APPLICATION_TYPE"].replace(app, "Other")


classification_counts = n_df["CLASSIFICATION"].value_counts()


replace_class = list(classification_counts[classification_counts < 1800].index)


for cls in replace_class:
    n_df["CLASSIFICATION"] = n_df["CLASSIFICATION"].replace(cls, "Other")


application_cat = list(n_df.dtypes[n_df.dtypes == "object"].index)


enc = OneHotEncoder(sparse=False)


encode_df = pd.DataFrame(enc.fit_transform(n_df[application_cat]))


encode_df.columns = enc.get_feature_names(application_cat)


n_df = n_df.merge(encode_df, left_index=True, right_index=True)
n_df = n_df.drop(labels=application_cat, axis=1)

In [5]:

y = n_df["IS_SUCCESSFUL"].values.reshape(-1, 1)
X = n_df.drop("IS_SUCCESSFUL", axis=1).values


X_train, X_test, y_train, y_test = train_test_split(X, y)


scaler = StandardScaler()


X_scaler = scaler.fit(X_train)


X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [6]:

number_input_features = len(X_train_scaled[0])

hidden_nodes_layer1 = 90

hidden_nodes_layer2 = 50

hidden_nodes_layer3 = 30

nn_inc_epochs = tf.keras.models.Sequential()

# First hidden layer
nn_inc_epochs.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn_inc_epochs.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn_inc_epochs.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_inc_epochs.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 90)                3960      
                                                                 
 dense_1 (Dense)             (None, 50)                4550      
                                                                 
 dense_2 (Dense)             (None, 1)                 51        
                                                                 
Total params: 8,561
Trainable params: 8,561
Non-trainable params: 0
_________________________________________________________________


In [7]:
# Compile the model before training it
nn_inc_epochs.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Increasing number of epochs
fit_model_inc_epochs = nn_inc_epochs.fit(X_train_scaled, y_train, epochs=200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_inc_epochs.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

### Changing activation function from `relu` to `tanh`

In [None]:

nn_tanh = tf.keras.models.Sequential()

nn_tanh.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="tanh"))


nn_tanh.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="tanh"))


nn_tanh.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))


nn_tanh.summary()

In [None]:
# Compile the model
nn_tanh.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
fit_model_tanh = nn_tanh.fit(X_train_scaled, y_train, epochs=100)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_tanh.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

### Design a neural network model, taking into account any modifications that will optimize the model to achieve higher than 75% accuracy.

In [None]:
#reading the file for processing
n_df = pd.read_csv("Resources/charity_data.csv")

In [None]:
# Drop non-essential columns
n_df = n_df.drop(labels=["EIN", "NAME"], axis=1)

# Bucket "APPLICATION_TYPE"
replace_application = list(n_counts[n_counts < 500].index)

# Replace in dataframe
for app in replace_application:
    n_df["APPLICATION_TYPE"] = n_df["APPLICATION_TYPE"].replace(app, "Other")

# Bucket "CLASSIFICATION"
replace_class = list(classification_counts[classification_counts < 1800].index)

# Replace in dataframe
for cls in replace_class:
    n_df["CLASSIFICATION"] = n_df["CLASSIFICATION"].replace(cls, "Other")

In [None]:
# Look at INCOME_AMT value counts for bucketing
income_counts = n_df["INCOME_AMT"].value_counts()
income_counts

In [None]:
# Determine which values to replace if counts are less than 3000
replace_income = list(income_counts[income_counts < 3000].index)

# Replace in dataframe
for income in replace_income:
    n_df["INCOME_AMT"] = n_df["INCOME_AMT"].replace(income, "Other")
    
# Check to make sure bucketing was successful
n_df["INCOME_AMT"].value_counts()

In [None]:
# Look at AFFILIATION value counts for bucketing
aff_counts = n_df["AFFILIATION"].value_counts()
aff_counts

In [None]:
# Determine which values to replace if counts are less than 15000
replace_aff = list(aff_counts[aff_counts < 15000].index)

# Replace in dataframe
for aff in replace_aff:
    n_df["AFFILIATION"] = n_df["AFFILIATION"].replace(aff, "Other")
    
# Check to make sure bucketing was successful
n_df["AFFILIATION"].value_counts()

In [None]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(n_df[application_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(application_cat)

In [None]:
# Merge one-hot encoded features and drop the originals
application_df = application_df.merge(encode_df, left_index=True, right_index=True)
application_df = application_df.drop(labels=application_cat, axis=1)

In [None]:
# Drop "SPECIAL_CONSIDERATION_N":
application_df = application_df.drop("SPECIAL_CONSIDERATIONS_N", axis=1)
application_df.head()

In [None]:
# Split our preprocessed data into our features and target arrays
y = application_df["IS_SUCCESSFUL"].values.reshape(-1, 1)
X = application_df.drop("IS_SUCCESSFUL", axis=1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30

nn_reduced_input = tf.keras.models.Sequential()

# First hidden layer
nn_reduced_input.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn_reduced_input.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn_reduced_input.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_reduced_input.summary()

In [None]:
# Compile the model
nn_reduced_input.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model_reduced_input = nn_reduced_input.fit(X_train_scaled, y_train, epochs=100)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_reduced_input.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Helper function to bin specified categorical columns with value counts less than min_count
def bin_cat_col(df, column, min_count):
    counts = df[column].value_counts()
    to_replace = list(counts[counts < min_count].index)
    for rep in to_replace:
        df[col] = df[col].replace(rep, "Other")

In [None]:
# Helper function to print results
def print_results(param_choice, input_vals, results):
    print(param_choice)
    for i in range(len(input_vals)):
        print(input_vals[i])
        print("Model Loss:", results[i][0])
        print("Model Accuracy:", results[i][1])
        print("")

### Vary Epochs

In [None]:
training_epochs = [50, 100, 200, 300, 500, 700, 1000]
results = []
for te in training_epochs:
    result = build_train_test(epochs=te)
    results.append(result)

In [None]:
print_results("Epochs", training_epochs, results)

#### Result: 100 Epochs yields highest accuracy (0.732) and second lowest loss (0.568)

### Vary Architecture

In [None]:
import itertools
architectures = []
n_node_options = [10, 30, 50, 80]
for l in range(1, len(n_node_options) + 1):
    for subset in itertools.combinations(n_node_options, l):
        subset_perms = list(itertools.permutations(subset))
        for architecture in subset_perms:
            architectures.append(architecture)

In [None]:
results = []
for arch in architectures:
    result = build_train_test(architecture=arch)
    results.append(result)

In [None]:
# Add results to dataframe for easier sorting
arch_loss = []
arch_acc = []
for result in results:
    arch_loss.append(result[0])
    arch_acc.append(result[1])

arch_results_df = pd.DataFrame({"architecture": architectures, "loss": arch_loss, "accuracy": arch_acc})
arch_results_df.head()

In [None]:
# Architectures with highest accuracy
arch_results_df.sort_values("accuracy", ascending=False).head()

In [None]:
# Architectures with lowest loss
arch_results_df.sort_values("loss").head(15)

#### Result: Architecture (80, 50, 30) -> 3 hidden layers with 80, 50, 30 nodes yields highest accuracy (0.740) and 12th lowest loss (0.561)

### Vary Activation Function

In [None]:
activation_funcs = ["relu", "tanh", "selu", "elu", "exponential"]
results = []
cnt = 0
for func in activation_funcs:
    print("Count:", cnt)
    result = build_train_test(activation=func)
    results.append(result)
    cnt += 1

In [None]:
print_results("Activation Functions", activation_funcs, results)

#### Result: Activation function `tanh` yields highest accuracy (0.734) and lowest loss (0.556)

### Vary Categorical Column Binning

In [None]:
cat_bin_cutoffs = {"APPLICATION_TYPE": 500, "CLASSIFICATION": 1800, "INCOME_AMT": 3000, "AFFILIATION": 15000}
model_args = [{}]  # extra empty dictionary for no binning
for l in range(1, len(n_node_options) + 1):
    for subset in itertools.combinations(cat_bin_cutoffs, l):
        model_arg = {}
        for col in subset:
            model_arg[col] = cat_bin_cutoffs[col]
            if model_arg not in model_args:
                model_args.append(model_arg)
model_args

In [None]:
results = []
cnt = 0
for cat_bin in model_args:
    print("Count:", cnt)
    result = build_train_test(cat_cutoffs=cat_bin)
    results.append(result)
    cnt += 1

In [None]:
# Add results to dataframe for easier sorting
cat_loss = []
cat_acc = []
for result in results:
    cat_loss.append(result[0])
    cat_acc.append(result[1])

cat_results_df = pd.DataFrame({"Categorical Features": model_args, "loss": cat_loss, "accuracy": cat_acc})
cat_results_df.head()

In [None]:
# Categorical binning with highest accuracy
cat_results_df.sort_values("accuracy", ascending=False).head()

In [None]:
# Architectures with lowest loss
cat_results_df.sort_values("loss").head()

#### Result: Binning only "CLASSIFICATION" with less than 1800 occurences of a value yields third highest accuracy (0.737) and third lowest loss (0.560)

### Vary Learning Rate Analysis

In [None]:
learning_rates_coarse = [0.0001, 0.001, 0.01, 0.1, 1]
results = []
cnt = 0
for rate in learning_rates_coarse:
    print("Count:", cnt)
    result = build_train_test(learning_rate=rate)
    results.append(result)
    cnt += 1

In [None]:
print_results("Learning Rates", learning_rates_coarse, results)

In [None]:
# Plot accuracy vs learning rate
lr_loss = []
lr_acc = []
for result in results:
    lr_loss.append(result[0])
    lr_acc.append(result[1])
    
f, ax = plt.subplots()
ax.plot(learning_rates_coarse, lr_acc)
ax.set_xscale("log")
f.show()

In [None]:
# Learning rate with highest accuracy betwen 0.0001 and 0.01
# Generate 6 random learning rates in this range
import numpy as np
bases = np.repeat(10, 3)
exponents_1 = -(np.random.rand(3) + 3) 
exponents_2 = -(np.random.rand(3) + 2) 
learning_rates_fine = np.power(bases, exponents_1).tolist() + np.power(bases, exponents_2).tolist()
learning_rates_fine

In [None]:
results = []
cnt = 0
for rate in learning_rates_fine:
    print("Count:", cnt)
    result = build_train_test(learning_rate=rate)
    results.append(result)
    cnt += 1

In [None]:
print_results("Learning Rates", learning_rates_fine, results)

#### Result: Learning Rate 0.0005935945876655383 yields second highest accuracy (0.737) and lowest loss (0.546)

### Combine Optimized Parameters

In [None]:
result = build_train_test(architecture=(80, 50, 30), activation="tanh",
                          learning_rate=0.0005935945876655383,
                          epochs=100, cat_cutoffs={"CLASSIFICATION": 1800})

In [None]:
result

#### Result: Roughly same as original model, accuracy decreased 0.729 to 0.728

In [None]:
# Vary Batch Size with Optimized Values
batch_sizes_coarse = [2**i for i in range(7)]

results = []
cnt = 0
for batch_size in batch_sizes_coarse:
    print("Count:", cnt)
    result = build_train_test(architecture=(80, 50, 30), activation="tanh",
                              learning_rate=0.0005935945876655383,
                              epochs=100, cat_cutoffs={"CLASSIFICATION": 1800},
                              batch_size=batch_size)
    results.append(result)
    cnt += 1

In [None]:
print_results("Batch Sizes", batch_sizes_coarse, results)

#### Result: Batch size of 16 generates second highest accuracy (0.737) and lowest loss (0.547)

In [None]:
# Read data
application_df = pd.read_csv("Resources/charity_data.csv")

# Drop unnecessary identification columns
application_df = application_df.drop(labels=["EIN", "NAME"], axis=1)

# Encode categorical columns without bucketing
application_cat = list(application_df.dtypes[application_df.dtypes == "object"].index)
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)
# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(application_df[application_cat]))
# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(application_cat)
# Merge one-hot encoded features and drop the originals
application_df = application_df.merge(encode_df, left_index=True, right_index=True)
application_df = application_df.drop(labels=application_cat, axis=1)
# Drop redundant "SPECIAL_CONSIDERATION_N":
application_df = application_df.drop("SPECIAL_CONSIDERATIONS_N", axis=1)

# Split our preprocessed data into our features and target arrays
y = application_df["IS_SUCCESSFUL"].values.reshape(-1, 1)
X = application_df.drop("IS_SUCCESSFUL", axis=1).values
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Create a StandardScaler instances
scaler = StandardScaler()
# Fit the StandardScaler
X_scaler = scaler.fit(X_train)
# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the model: 3 hidden layers with 80, 50, and 30 nodes and relu activation function at inner layers
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 50
hidden_nodes_layer3 = 30

nn_optimized = tf.keras.models.Sequential()
# Input layer and first hidden layer
nn_optimized.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
# Second hidden layer
nn_optimized.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))
# Third hidden layer
nn_optimized.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))
# Output layer
nn_optimized.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_optimized.summary()

In [None]:
# Compile the model
nn_optimized.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
import os
from tensorflow.keras.callbacks import ModelCheckpoint
# Add model weight checkpoints during training
os.makedirs("checkpoints_optimized/", exist_ok=True)
checkpoint_path="checkpoints_optimized/weights.{epoch}.hdf5"

# Create callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                              verbose=1,
                              save_weights_only=True,
                              save_freq="epoch",
                              period=5)

In [None]:
# Train the model
fit_model_optimized = nn_optimized.fit(X_train_scaled, y_train, epochs=100, callbacks=[cp_callback])

In [None]:
# Visualize Loss/Accuracy
import matplotlib.pyplot as plt
history_df = pd.DataFrame(fit_model_optimized.history,
                          index=range(1, len(fit_model_optimized.history["loss"]) + 1))

fig, ax = plt.subplots()
loss = ax.plot(history_df["loss"], color="red", label="Loss")
ax.set_xlabel("Epoch")
ax.set_ylabel("Loss")

ax2 = ax.twinx()
acc = ax2.plot(history_df["accuracy"], color="blue", label="Accuracy")
ax2.set_ylabel("Accuracy")

curves = loss + acc
labs = [l.get_label() for l in curves]
ax.legend(curves, labs, loc="center right")

plt.show()

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_optimized.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Export and save model to HDF5 file
nn_optimized.save("AlphabetSoupCharity_Optimization.h5")

# Step 4: Write a Report on the Neural Network Model




The best perfoming model had a few things in common:

- Loss was below 0.6
- Accuracy was above: 0.73
- Could not bucket the variables
- epoch was between 100-200
- hidden layer with 90, 60 and 30 nodes

