In [334]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

In [335]:
#  Import and read the charity_data.csv.
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [336]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df = application_df.drop(columns=['EIN', 'NAME'])

In [337]:
application_df['ASK_AMT'].value_counts()

ASK_AMT
5000        25398
10478           3
15583           3
63981           3
6725            3
            ...  
5371754         1
30060           1
43091152        1
18683           1
36500179        1
Name: count, Length: 8747, dtype: int64

In [338]:
bins = [0, 1000000, 2000000, 3000000,
        4000000, 5000000, 6000000, 7000000, 8000000, 9000000,
          10000000]
labels = [1,2,3,4,5,6,7,8,9,10]

application_df['ASK_AMT_BINNED'] = pd.cut(application_df['ASK_AMT'],
                                          bins=bins, labels=labels)

application_df['ASK_AMT_BINNED'].value_counts()

ASK_AMT_BINNED
1     32722
2       485
3       222
4       136
5        97
6        83
7        58
8        42
9        23
10       19
Name: count, dtype: int64

In [339]:
application_df = application_df.drop(columns=['ASK_AMT'])

In [340]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
classifications_to_replace = application_df['CLASSIFICATION'].value_counts(
                    )[application_df['CLASSIFICATION'].value_counts(
                        dropna=False) < 1000].index.tolist()

# Replace in dataframe
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")

# Check to make sure replacement was successful
application_df['CLASSIFICATION'].value_counts()

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: count, dtype: int64

In [341]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
application_types_to_replace = application_df['APPLICATION_TYPE'].value_counts(
                    )[application_df['APPLICATION_TYPE'].value_counts(
                        dropna=False) < 500].index.tolist()

# Replace in dataframe
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure replacement was successful
application_df['APPLICATION_TYPE'].value_counts()

APPLICATION_TYPE
T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: count, dtype: int64

In [342]:
# Convert categorical data to numeric with `pd.get_dummies`
application_df_converted = pd.get_dummies(application_df, dtype=int)
application_df_converted

Unnamed: 0,STATUS,IS_SUCCESSFUL,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,APPLICATION_TYPE_T7,...,ASK_AMT_BINNED_1,ASK_AMT_BINNED_2,ASK_AMT_BINNED_3,ASK_AMT_BINNED_4,ASK_AMT_BINNED_5,ASK_AMT_BINNED_6,ASK_AMT_BINNED_7,ASK_AMT_BINNED_8,ASK_AMT_BINNED_9,ASK_AMT_BINNED_10
0,1,1,0,1,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,1,1,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
3,1,1,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,1,1,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,1,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
34295,1,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
34296,1,0,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
34297,1,1,0,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0


In [343]:
# Split our preprocessed data into our features and target arrays
X = application_df_converted.drop(columns='IS_SUCCESSFUL')
y = application_df_converted['IS_SUCCESSFUL']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=78)

In [344]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [345]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=100, activation="relu", input_dim=52))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=50, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=20, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [346]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [347]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 787us/step - accuracy: 0.7040 - loss: 0.5882
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 687us/step - accuracy: 0.7331 - loss: 0.5490
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 673us/step - accuracy: 0.7341 - loss: 0.5505
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 708us/step - accuracy: 0.7328 - loss: 0.5478
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 774us/step - accuracy: 0.7344 - loss: 0.5491
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 718us/step - accuracy: 0.7351 - loss: 0.5449
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 635us/step - accuracy: 0.7370 - loss: 0.5451
Epoch 8/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 668us/step - accuracy: 0.7414 - loss: 0.5404
Epoch 9/100
[1m

In [348]:
# Model with bins for ask_amt added and an added hidden layer
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 5s - 17ms/step - accuracy: 0.7255 - loss: 0.5835
Loss: 0.5835144519805908, Accuracy: 0.7254810333251953


In [349]:
nn.save('AlphabetSoupCharity_Optimization1.h5')



In [350]:
application_df['USE_CASE'].value_counts()

USE_CASE
Preservation     28095
ProductDev        5671
CommunityServ      384
Heathcare          146
Other                3
Name: count, dtype: int64

In [351]:
# Choose a cutoff value and create a list of use_cases to be replaced
# use the variable name `use_cases_to_replace`
use_cases_to_replace = application_df['USE_CASE'].value_counts(
                    )[application_df['USE_CASE'].value_counts(
                        dropna=False) < 1000].index.tolist()

# Replace in dataframe
for cases in use_cases_to_replace:
    application_df['USE_CASE'] = application_df['USE_CASE'].replace(cases,"Other")

# Check to make sure replacement was successful
application_df['USE_CASE'].value_counts()

USE_CASE
Preservation    28095
ProductDev       5671
Other             533
Name: count, dtype: int64

In [352]:
# Convert categorical data to numeric with `pd.get_dummies`
application_df_converted = pd.get_dummies(application_df, dtype=int)
application_df_converted

Unnamed: 0,STATUS,IS_SUCCESSFUL,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,APPLICATION_TYPE_T7,...,ASK_AMT_BINNED_1,ASK_AMT_BINNED_2,ASK_AMT_BINNED_3,ASK_AMT_BINNED_4,ASK_AMT_BINNED_5,ASK_AMT_BINNED_6,ASK_AMT_BINNED_7,ASK_AMT_BINNED_8,ASK_AMT_BINNED_9,ASK_AMT_BINNED_10
0,1,1,0,1,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,1,1,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
3,1,1,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,1,1,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,1,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
34295,1,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
34296,1,0,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
34297,1,1,0,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0


In [353]:
# Split our preprocessed data into our features and target arrays
X = application_df_converted.drop(columns='IS_SUCCESSFUL')
y = application_df_converted['IS_SUCCESSFUL']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=78)

In [354]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [355]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=100, activation="relu", input_dim=50))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=50, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=20, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [356]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [357]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 744us/step - accuracy: 0.6993 - loss: 0.5942
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 638us/step - accuracy: 0.7274 - loss: 0.5570
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 661us/step - accuracy: 0.7285 - loss: 0.5530
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 614us/step - accuracy: 0.7281 - loss: 0.5520
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 620us/step - accuracy: 0.7351 - loss: 0.5470
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 660us/step - accuracy: 0.7359 - loss: 0.5447
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 624us/step - accuracy: 0.7405 - loss: 0.5421
Epoch 8/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 598us/step - accuracy: 0.7326 - loss: 0.5471
Epoch 9/100
[1m

In [358]:
# Model with bins for ask_amt added, a cutoff value added to use_case, and an added hidden layer
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - 868us/step - accuracy: 0.7233 - loss: 0.5837
Loss: 0.5837016701698303, Accuracy: 0.7232652902603149


In [359]:
nn.save('AlphabetSoupCharity_Optimization2.h5')



In [360]:
application_df_2 = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df_2.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [361]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df_2 = application_df_2.drop(columns=['EIN', 'NAME', 'CLASSIFICATION','APPLICATION_TYPE', 'ASK_AMT'])

In [362]:
application_df_2['CLASSIFICATION']= application_df['CLASSIFICATION']

In [363]:
application_df_2['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE']

In [364]:
# Convert categorical data to numeric with `pd.get_dummies`
application_df_converted_2 = pd.get_dummies(application_df_2, dtype=int)
application_df_converted_2

Unnamed: 0,STATUS,IS_SUCCESSFUL,AFFILIATION_CompanySponsored,AFFILIATION_Family/Parent,AFFILIATION_Independent,AFFILIATION_National,AFFILIATION_Other,AFFILIATION_Regional,USE_CASE_CommunityServ,USE_CASE_Heathcare,...,CLASSIFICATION_Other,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,APPLICATION_TYPE_T7,APPLICATION_TYPE_T8
0,1,1,0,0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,1,1,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,1,1,0,0,1,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
34295,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
34296,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
34297,1,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [365]:
# Split our preprocessed data into our features and target arrays
X = application_df_converted_2.drop(columns='IS_SUCCESSFUL')
y = application_df_converted_2['IS_SUCCESSFUL']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=78)

In [366]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [367]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=100, activation="tanh", input_dim=42))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=50, activation="tanh"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=50, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="relu"))

# Check the structure of the model
nn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [368]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [369]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 622us/step - accuracy: 0.6715 - loss: 1.1254
Epoch 2/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 607us/step - accuracy: 0.7026 - loss: 0.8623
Epoch 3/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 604us/step - accuracy: 0.7213 - loss: 0.6683
Epoch 4/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 658us/step - accuracy: 0.7080 - loss: 0.7586
Epoch 5/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 643us/step - accuracy: 0.6585 - loss: 1.4529
Epoch 6/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 599us/step - accuracy: 0.7202 - loss: 0.5791
Epoch 7/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 607us/step - accuracy: 0.7213 - loss: 0.6070
Epoch 8/100
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 618us/step - accuracy: 0.7182 - loss: 0.6807
Epoch 9/100
[1m

In [None]:
# Evaluate the model using the test data, took away the ASK_AMT Column, and changed some of the activation layers
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - 951us/step - accuracy: 0.7190 - loss: 0.6253
Loss: 0.6252564787864685, Accuracy: 0.7189504504203796


In [371]:
nn.save('AlphabetSoupCharity_Optimization3.h5')

