# Optimize the Model

In [17]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
import pandas as pd
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


## Trial 1: Adjust Input Data - Drop More Columns

In [18]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df = application_df.drop(['EIN', 'NAME'], axis=1)

# Review
application_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [19]:
# Replace rare occurrences in APPLICATION_TYPE and CLASSIFICATION with 'Other'
app_type_counts = application_df['APPLICATION_TYPE'].value_counts()
application_types_to_replace = list(app_type_counts[app_type_counts < 1000].index)
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app, "Other")

class_counts = application_df['CLASSIFICATION'].value_counts()
classifications_to_replace = list(class_counts[class_counts < 1500].index)
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls, "Other")

In [20]:
# Convert categorical data to numeric
application_df = pd.get_dummies(application_df, dtype=float)


In [None]:
# Split our preprocessed data into features and target arrays
y = application_df["IS_SUCCESSFUL"].values
X = application_df.drop(columns=["IS_SUCCESSFUL"], axis=1).values

In [21]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [22]:
# Create a StandardScaler instance and scale the data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [23]:
# Define and compile the neural network model
nn_1 = tf.keras.models.Sequential()

# First hidden layer
nn_1.add(tf.keras.layers.Dense(units=128, activation='relu', input_dim=len(X_train[0])))

# Second hidden layer
nn_1.add(tf.keras.layers.Dense(units=64, activation='relu'))

# Output layer
nn_1.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [24]:
# Compile the model
nn_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
fit_model_1 = nn_1.fit(X_train_scaled, y_train, epochs=100, batch_size=64, validation_split=0.2)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [25]:
# Evaluate the model
model_loss_1, model_accuracy_1 = nn_1.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Trial 1 - Loss: {model_loss_1}, Accuracy: {model_accuracy_1}")

268/268 - 0s - loss: 0.5606 - accuracy: 0.7257 - 84ms/epoch - 315us/step
Trial 1 - Loss: 0.5605568289756775, Accuracy: 0.7257142663002014


In [33]:
# Save the model
nn_1.save('./Outputs/AlphabetSoupCharity_Trial1.h5')

## Trial 2: Add More Hidden Layers

In [26]:
# Define and compile the neural network model with more hidden layers
nn_2 = tf.keras.models.Sequential()

# First hidden layer
nn_2.add(tf.keras.layers.Dense(units=128, activation='relu', input_dim=len(X_train[0])))

# Second hidden layer
nn_2.add(tf.keras.layers.Dense(units=64, activation='relu'))

# Third hidden layer
nn_2.add(tf.keras.layers.Dense(units=32, activation='relu'))

# Fourth hidden layer
nn_2.add(tf.keras.layers.Dense(units=16, activation='relu'))

# Output layer
nn_2.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [27]:
# Compile the model
nn_2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
fit_model_2 = nn_2.fit(X_train_scaled, y_train, epochs=100, batch_size=64, validation_split=0.2)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [28]:
# Evaluate the model
model_loss_2, model_accuracy_2 = nn_2.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Trial 2 - Loss: {model_loss_2}, Accuracy: {model_accuracy_2}")

268/268 - 0s - loss: 0.5676 - accuracy: 0.7251 - 73ms/epoch - 273us/step
Trial 2 - Loss: 0.5676242709159851, Accuracy: 0.7251312136650085


In [34]:
# Save the model
nn_2.save('./Outputs/AlphabetSoupCharity_Trial2.h5')

## Trial 3: Different Activation Functions

In [29]:
# Define and compile the neural network model with different activation functions
nn_3 = tf.keras.models.Sequential()

# First hidden layer
nn_3.add(tf.keras.layers.Dense(units=128, activation='tanh', input_dim=len(X_train[0])))

# Second hidden layer
nn_3.add(tf.keras.layers.Dense(units=64, activation='tanh'))

# Third hidden layer
nn_3.add(tf.keras.layers.Dense(units=32, activation='tanh'))

# Output layer
nn_3.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [30]:
# Compile the model
nn_3.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
fit_model_3 = nn_3.fit(X_train_scaled, y_train, epochs=100, batch_size=64, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [31]:
# Evaluate the model
model_loss_3, model_accuracy_3 = nn_3.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Trial 3 - Loss: {model_loss_3}, Accuracy: {model_accuracy_3}")

268/268 - 0s - loss: 0.5617 - accuracy: 0.7245 - 67ms/epoch - 250us/step
Trial 3 - Loss: 0.5616837739944458, Accuracy: 0.7245481014251709


In [35]:
# Save the model
nn_3.save('./Outputs/AlphabetSoupCharity_Trial3.h5')

## Comparison of Trials

In [32]:
print(f"Trial 1 - Loss: {model_loss_1}, Accuracy: {model_accuracy_1}")
print(f"Trial 2 - Loss: {model_loss_2}, Accuracy: {model_accuracy_2}")
print(f"Trial 3 - Loss: {model_loss_3}, Accuracy: {model_accuracy_3}")

Trial 1 - Loss: 0.5605568289756775, Accuracy: 0.7257142663002014
Trial 2 - Loss: 0.5676242709159851, Accuracy: 0.7251312136650085
Trial 3 - Loss: 0.5616837739944458, Accuracy: 0.7245481014251709
