In [21]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, LearningRateScheduler

# Load the data
url = "https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv"
application_df = pd.read_csv(url)

# Display the first few rows
application_df.head()


Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [22]:
# Drop the EIN and NAME columns
application_df = application_df.drop(['EIN', 'NAME'], axis=1)

In [23]:
# Combine rare application types into "Other"
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
cutoff = 500
application_types_to_replace = application_type_counts[application_type_counts < cutoff].index.tolist()
application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(application_types_to_replace, 'Other')

In [24]:
# Combine rare classifications into "Other"
classification_counts = application_df['CLASSIFICATION'].value_counts()
cutoff = 1000
classifications_to_replace = classification_counts[classification_counts < cutoff].index.tolist()
application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(classifications_to_replace, 'Other')

In [25]:
# Convert categorical data to numeric with pd.get_dummies
application_df = pd.get_dummies(application_df)

In [26]:
# Define features and target
X = application_df.drop('IS_SUCCESSFUL', axis=1).values
y = application_df['IS_SUCCESSFUL'].values

In [27]:
# Split the data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [28]:
# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [29]:
# Define the learning rate reduction and early stopping callbacks
lr_reduction = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, factor=0.5, min_lr=0.00001)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

In [30]:
# Create the LearningRateScheduler callback
def lr_schedule(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

In [31]:
lr_scheduler = LearningRateScheduler(lr_schedule)


In [32]:
# Define the optimized model
nn_optimized = tf.keras.Sequential()

In [33]:

# First hidden layer with Batch Normalization and L2 regularization
nn_optimized.add(Dense(units=128, activation='relu', kernel_regularizer=l2(0.01), input_dim=X_train.shape[1]))
nn_optimized.add(BatchNormalization())
nn_optimized.add(Dropout(0.5))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [34]:

# Second hidden layer with Batch Normalization and L2 regularization
nn_optimized.add(Dense(units=64, activation='relu', kernel_regularizer=l2(0.01)))
nn_optimized.add(BatchNormalization())
nn_optimized.add(Dropout(0.5))

In [35]:

# Third hidden layer with Batch Normalization and L2 regularization
nn_optimized.add(Dense(units=32, activation='relu', kernel_regularizer=l2(0.01)))
nn_optimized.add(BatchNormalization())
nn_optimized.add(Dropout(0.5))

In [36]:

# Fourth hidden layer with Batch Normalization and L2 regularization
nn_optimized.add(Dense(units=16, activation='relu', kernel_regularizer=l2(0.01)))
nn_optimized.add(BatchNormalization())
nn_optimized.add(Dropout(0.5))

In [37]:

# Fifth hidden layer with Batch Normalization and L2 regularization
nn_optimized.add(Dense(units=8, activation='relu', kernel_regularizer=l2(0.01)))
nn_optimized.add(BatchNormalization())
nn_optimized.add(Dropout(0.5))

In [38]:
# Output layer
nn_optimized.add(Dense(units=1, activation='sigmoid'))

In [39]:
# Compile the optimized model
nn_optimized.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [40]:
def schedule(epoch, lr):
    # Define your learning rate schedule logic here
    new_lr = 0.0004524187243077904  # Example value, replace with your logic
    return float(new_lr)  # Ensure the returned value is a float

In [41]:
# Use the LearningRateScheduler callback
from tensorflow.keras.callbacks import LearningRateScheduler
lr_scheduler = LearningRateScheduler(schedule)

In [42]:
# Train the optimized model with callbacks
history = nn_optimized.fit(X_train, y_train,
                           epochs=150,
                           batch_size=32,
                           validation_data=(X_test, y_test),
                           callbacks=[lr_reduction, early_stopping, lr_scheduler])

Epoch 1/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 8ms/step - accuracy: 0.5247 - loss: 2.5108 - val_accuracy: 0.7122 - val_loss: 1.5613 - learning_rate: 4.5242e-04
Epoch 2/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.6139 - loss: 1.3972 - val_accuracy: 0.7214 - val_loss: 0.9549 - learning_rate: 4.5242e-04
Epoch 3/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.6867 - loss: 0.9125 - val_accuracy: 0.7182 - val_loss: 0.7260 - learning_rate: 4.5242e-04
Epoch 4/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.7101 - loss: 0.7214 - val_accuracy: 0.7239 - val_loss: 0.6416 - learning_rate: 4.5242e-04
Epoch 5/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.7039 - loss: 0.6568 - val_accuracy: 0.7217 - val_loss: 0.6169 - learning_rate: 4.5242e-04
Epoch 6/150
[1m858/858[0m [32m━━━━━━━━━━━

In [43]:
# Evaluate the optimized model
loss, accuracy = nn_optimized.evaluate(X_test, y_test)
print(f"Optimized Model Loss: {loss}, Optimized Model Accuracy: {accuracy}")


[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7257 - loss: 0.5804
Optimized Model Loss: 0.5813832879066467, Optimized Model Accuracy: 0.7249271273612976


In [44]:
# Save the optimized model to an HDF5 file
nn_optimized.save('AlphabetSoupCharity_Optimization2.h5')


