In [7]:
# Step 3: Optimizing the Model

In [4]:
# 1. Import Dependencies
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [5]:
# 2. Load the CSV Data
url = "https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv"
application_df = pd.read_csv(url)

In [6]:
# 3. Preprocess the Data

In [8]:
# Drop non-beneficial columns
application_df.drop(['EIN', 'NAME'], axis=1, inplace=True)

In [9]:
# Bin rare values for APPLICATION_TYPE
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
cutoff = 100  # Adjust this cutoff as needed
application_types_to_replace = application_type_counts[application_type_counts < cutoff].index.tolist()
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app, "Other")

In [10]:
# Bin rare values for CLASSIFICATION
classification_counts = application_df['CLASSIFICATION'].value_counts()
cutoff_class = 50  # Adjust this cutoff as needed
classifications_to_replace = classification_counts[classification_counts < cutoff_class].index.tolist()
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls, "Other")


In [11]:
# Convert categorical columns to numeric using one-hot encoding
categorical_columns = ['APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION',
                       'USE_CASE', 'ORGANIZATION', 'STATUS',
                       'INCOME_AMT', 'SPECIAL_CONSIDERATIONS']
application_df = pd.get_dummies(application_df, columns=categorical_columns, drop_first=True)

In [12]:
# Split data into features and target
X = application_df.drop('IS_SUCCESSFUL', axis=1)
y = application_df['IS_SUCCESSFUL']

In [13]:
# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [14]:
# Scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [15]:
# 4. Define an Optimized Neural Network Model

optimized_nn = tf.keras.models.Sequential()

In [16]:
# First hidden layer with 128 neurons and ReLU activation, plus input shape
optimized_nn.add(tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [17]:
# Dropout layer for regularization
optimized_nn.add(tf.keras.layers.Dropout(0.2))


In [18]:
# Second hidden layer with 64 neurons and ReLU activation
optimized_nn.add(tf.keras.layers.Dense(64, activation='relu'))

In [19]:
# Third hidden layer with 32 neurons and ReLU activation
optimized_nn.add(tf.keras.layers.Dense(32, activation='relu'))

In [20]:
# Output layer for binary classification
optimized_nn.add(tf.keras.layers.Dense(1, activation='sigmoid'))

In [21]:
# Compile the model using Adam optimizer and binary crossentropy loss
optimized_nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [22]:
# 5. Set Up Callbacks

In [23]:
# EarlyStopping callback: stops training if validation loss doesn't improve for 10 epochs
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


In [24]:
# Custom callback to save model weights every 5 epochs
# This callback saves weights to a file ending with ".weights.h5" every 5 epochs.
class CustomModelCheckpoint(tf.keras.callbacks.Callback):
    def __init__(self, save_every):
        super().__init__()
        self.save_every = save_every

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_every == 0:
            filepath = f'optimized_weights_epoch_{epoch + 1:02d}.weights.h5'
            self.model.save_weights(filepath)
            print(f'\nSaved weights at epoch {epoch + 1} to {filepath}')

custom_checkpoint = CustomModelCheckpoint(save_every=5)


In [25]:
# 6. Train the Optimized Model
history_optimized = optimized_nn.fit(
    X_train_scaled,
    y_train,
    epochs=200,
    validation_split=0.2,
    callbacks=[early_stop, custom_checkpoint]
)

Epoch 1/200
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.6888 - loss: 0.6002 - val_accuracy: 0.7341 - val_loss: 0.5489
Epoch 2/200
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7213 - loss: 0.5645 - val_accuracy: 0.7380 - val_loss: 0.5450
Epoch 3/200
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7207 - loss: 0.5613 - val_accuracy: 0.7403 - val_loss: 0.5432
Epoch 4/200
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7305 - loss: 0.5528 - val_accuracy: 0.7314 - val_loss: 0.5469
Epoch 5/200
[1m599/601[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.7261 - loss: 0.5562
Saved weights at epoch 5 to optimized_weights_epoch_05.weights.h5
[1m601/601[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7262 - loss: 0.5562 - val_accuracy: 0.7399 - val_loss: 0.5434
Epoch 6/20

In [26]:
# 7. Evaluate the Optimized Model on Test Data
opt_loss, opt_accuracy = optimized_nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Optimized Model - Loss: {opt_loss}, Accuracy: {opt_accuracy}")

322/322 - 2s - 5ms/step - accuracy: 0.7327 - loss: 0.5528
Optimized Model - Loss: 0.5528254508972168, Accuracy: 0.7326530814170837


In [27]:
# 8. Save the Optimized Model to an HDF5 File
optimized_nn.save("AlphabetSoupCharity_Optimization.h5")

