In [4]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Load the dataset
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")

# Preprocessing (same as before)
application_df = application_df.drop(['EIN', 'NAME'], axis=1)

# Binning APPLICATION_TYPE
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
threshold = 500
application_types_to_replace = application_type_counts[application_type_counts < threshold].index
application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(application_types_to_replace, 'Other')

# Binning CLASSIFICATION
classification_counts = application_df['CLASSIFICATION'].value_counts()
classifications_to_replace = list(classification_counts[classification_counts < 300].index)
application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(classifications_to_replace, "Other")

# Convert categorical data to numeric
numeric_app_df = pd.get_dummies(application_df)

# Split data
y = numeric_app_df["IS_SUCCESSFUL"]
X = numeric_app_df.drop(["IS_SUCCESSFUL"], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Scale data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the model (modified for optimization)
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 80  # Increased nodes
hidden_nodes_layer2 = 30 # Increased nodes
hidden_nodes_layer3 = 10 # Added another layer

nn = tf.keras.models.Sequential()
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu")) #Added layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))


# Compile the model with different optimizer and learning rate
nn.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=["accuracy"])

# Early stopping to prevent overfitting
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


# Train the model with increased epochs and validation data
fit_model = nn.fit(X_train_scaled, y_train, epochs=20, validation_split=0.15, callbacks=[early_stopping]) #Added validation split and early stopping

# Evaluate the model
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 848us/step - accuracy: 0.6983 - loss: 0.5949 - val_accuracy: 0.7261 - val_loss: 0.5636
Epoch 2/20
[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 690us/step - accuracy: 0.7326 - loss: 0.5511 - val_accuracy: 0.7253 - val_loss: 0.5590
Epoch 3/20
[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 682us/step - accuracy: 0.7334 - loss: 0.5485 - val_accuracy: 0.7227 - val_loss: 0.5609
Epoch 4/20
[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 691us/step - accuracy: 0.7344 - loss: 0.5499 - val_accuracy: 0.7232 - val_loss: 0.5592
Epoch 5/20
[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 667us/step - accuracy: 0.7353 - loss: 0.5431 - val_accuracy: 0.7248 - val_loss: 0.5595
Epoch 6/20
[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 695us/step - accuracy: 0.7383 - loss: 0.5447 - val_accuracy: 0.7232 - val_loss: 0.5589
Epoch 7/20
[1m684/684[0m 

In [2]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import keras_tuner as kt

# Load the dataset
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")

# Preprocessing (same as before)
application_df = application_df.drop(['EIN', 'NAME'], axis=1)

# Binning APPLICATION_TYPE
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
threshold = 500
application_types_to_replace = application_type_counts[application_type_counts < threshold].index
application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(application_types_to_replace, 'Other')

# Binning CLASSIFICATION
classification_counts = application_df['CLASSIFICATION'].value_counts()
classifications_to_replace = list(classification_counts[classification_counts < 300].index)
application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(classifications_to_replace, "Other")

# Convert categorical data to numeric
numeric_app_df = pd.get_dummies(application_df)

# Split data
y = numeric_app_df["IS_SUCCESSFUL"]
X = numeric_app_df.drop(["IS_SUCCESSFUL"], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Scale data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

def build_model(hp):
    number_input_features = len(X_train_scaled[0])
    nn_model = tf.keras.models.Sequential()

    # Tune the number of units in the first Dense layer
    # Choose an optimal value between 32-512
    hp_units1 = hp.Int('units1', min_value=32, max_value=512, step=32)
    nn_model.add(tf.keras.layers.Dense(units=hp_units1, activation='relu', input_dim=number_input_features))


    # Add another dense layer
    hp_units2 = hp.Int('units2', min_value=32, max_value=512, step=32)
    nn_model.add(tf.keras.layers.Dense(units=hp_units2, activation='relu'))

        # Add another dense layer
    hp_units2 = hp.Int('units3', min_value=32, max_value=512, step=32)
    nn_model.add(tf.keras.layers.Dense(units=hp_units2, activation='relu'))

    # Add another dense layer
    hp_units2 = hp.Int('units4', min_value=32, max_value=512, step=32)
    nn_model.add(tf.keras.layers.Dense(units=hp_units2, activation='relu'))

    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    nn_model.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate), metrics=["accuracy"])
    return nn_model

tuner = kt.Hyperband(
    build_model,
    objective="val_accuracy",
    max_epochs=20,
    factor=3,
    directory='my_dir',
    project_name='intro_to_kt'
)

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

tuner.search(X_train_scaled,y_train,epochs=20,validation_split=0.15,callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=3)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units1')}, and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")

# Evaluate the hypermodel
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train_scaled, y_train, epochs=50, validation_split=0.15)

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

hypermodel = tuner.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(X_train_scaled, y_train, epochs=best_epoch, validation_split=0.15)

eval_result = hypermodel.evaluate(X_test_scaled, y_test)
print("[test loss, test accuracy]:", eval_result)

Reloading Tuner from my_dir\intro_to_kt\tuner0.json

The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is 64, and the optimal learning rate for the optimizer
is 0.001.



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 901us/step - accuracy: 0.6993 - loss: 0.5897 - val_accuracy: 0.7196 - val_loss: 0.5621
Epoch 2/50
[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 728us/step - accuracy: 0.7269 - loss: 0.5566 - val_accuracy: 0.7201 - val_loss: 0.5659
Epoch 3/50
[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 739us/step - accuracy: 0.7416 - loss: 0.5414 - val_accuracy: 0.7209 - val_loss: 0.5716
Epoch 4/50
[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 738us/step - accuracy: 0.7362 - loss: 0.5488 - val_accuracy: 0.7207 - val_loss: 0.5618
Epoch 5/50
[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 725us/step - accuracy: 0.7349 - loss: 0.5458 - val_accuracy: 0.7227 - val_loss: 0.5558
Epoch 6/50
[1m684/684[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 765us/step - accuracy: 0.7361 - loss: 0.5429 - val_accuracy: 0.7225 - val_loss: 0.5608
Epoch 7/50
[1m6

In [5]:
# Save the model (optional)
nn.save('Models/AlphabetSoupCharity_optimized_hypertuning.h5')

