## Preprocessing

In [None]:
# import just the first dependency we need to start the timer
import time

# Start the timer
start_the_timer = time.time()

# Import our dependencies
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from google.colab import files
import pandas as pd
import tensorflow as tf
import keras
import warnings

#  Import and read the charity_data.csv.
import pandas as pd
df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")

## Compile, Train and Evaluate the Model

In [None]:
# set our variables
iterations = 0
first_hidden_layer = 3
second_hidden_layer = 2
third_hidden_layer = 0
activation_1 = 'tanh'
activation_2 = 'relu'
activation_3 = 'sigmoid'
epochs_count = 2

In [None]:
# Drop the non-beneficial ID columns, 'EIN', 'NAME', 'STATUS', 'SPECIAL_CONSIDERATIONS'
optimized_df = df[['NAME',
                   'APPLICATION_TYPE',
                   'AFFILIATION',
                   'CLASSIFICATION',
                   'USE_CASE',
                   'ORGANIZATION',
                   'INCOME_AMT',
                   'ASK_AMT',
                   'IS_SUCCESSFUL']]

In [None]:
# Start measuring time
start_time = time.time()

# Disable the SettingWithCopyWarning
pd.options.mode.chained_assignment = None  # default='warn'

# Define column names and cutoff values in a dictionary
column_info = {"NAME": {"cutoff": 2},
               "APPLICATION_TYPE": {"cutoff": 528},
               "CLASSIFICATION": {"cutoff": 1883},
               "INCOME_AMT": {"cutoff": 24388},
               "ASK_AMT": {"cutoff": 25398}}

replacement_value = "Other"

def replace_low_counts(optimized_df, column_info, replacement_value):
    for col, info in column_info.items():
        value_counts = optimized_df[col].value_counts()
        values_to_replace = list(value_counts[value_counts < info["cutoff"]].index)
        optimized_df[col] = optimized_df[col].replace(values_to_replace, replacement_value)

# Call your function
replace_low_counts(optimized_df, column_info, replacement_value)

# Stop measuring time
end_time = time.time()

# Calculate the elapsed time
elapsed_time = end_time - start_time

# Convert elapsed time to hours, minutes, and seconds
hours, remainder = divmod(elapsed_time, 3600)
minutes, seconds = divmod(remainder, 60)

# Record the Training time
print(f"Optimization Run Time: {int(hours)}h {int(minutes)}m {int(seconds)}s")

Optimization Run Time: 0h 0m 31s


In [None]:
optimized_df = pd.get_dummies(optimized_df)

In [None]:
# Split our preprocessed data into our features and target arrays
X = optimized_df.drop('IS_SUCCESSFUL', axis = 1)
y = optimized_df['IS_SUCCESSFUL']

In [None]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

In [None]:
# Fit the StandardScaler
X_scaler = scaler.fit(X_train)


In [None]:
# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features = len(X_train_scaled[0])
nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=first_hidden_layer, input_dim=input_features, activation=activation_1))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=second_hidden_layer, activation=activation_2))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation=activation_3))

In [None]:
# Check the structure of the model
nn.summary()

In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Disable the SettingWithCopyWarning
pd.options.mode.chained_assignment = None

# Define the filename
# iterations = 3
filename = f"AlphabetSoupCharity_Optimization_{iterations}.h5"

# Start measuring time
start_time = time.time()

# Train the neural network model
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    nn.fit(X_train_scaled, y_train, epochs=epochs_count, verbose=0)

# Stop measuring time
end_time = time.time()

# Calculate the elapsed time
elapsed_time = end_time - start_time

# Convert elapsed time to hours, minutes, and seconds
hours, remainder = divmod(elapsed_time, 3600)
minutes, seconds = divmod(remainder, 60)

# Record the Training time
print(f"Training Run Time: {int(hours)}h {int(minutes)}m {int(seconds)}s")

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=0)
print(f"Model Loss: {model_loss}/nModel Accuracy: {model_accuracy}")

In [None]:
# Disable the SettingWithCopyWarning
pd.options.mode.chained_assignment = None

# Define the filename
filename = f"AlphabetSoupCharity_Optimization_{iterations}.h5"

# Save the neural network model with the constructed filename
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    nn.save(filename)

# Download the file
files.download(filename)

# Direct the user where to find the file
print(f"{filename} has been exported to your Downloads folder.")

# Stop timing us
stop_the_timer = time.time()

# Calculate entire run time
entire_run_time = stop_the_timer - start_the_timer

# Convert elapsed time to hours, minutes, and seconds
hours, remainder = divmod(entire_run_time, 3600)
minutes, seconds = divmod(remainder, 60)

# Record the Training time
print(f"Total Run Time: {int(hours)}h {int(minutes)}m {int(seconds)}s")