In [1]:
# Step 1: Import libraries and read the data
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import keras_tuner as kt

In [3]:
# Load charity data from the provided URL
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")

# Inspect the data
print(application_df.head())

        EIN                                      NAME APPLICATION_TYPE  \
0  10520599              BLUE KNIGHTS MOTORCYCLE CLUB              T10   
1  10531628    AMERICAN CHESAPEAKE CLUB CHARITABLE TR               T3   
2  10547893        ST CLOUD PROFESSIONAL FIREFIGHTERS               T5   
3  10553066            SOUTHSIDE ATHLETIC ASSOCIATION               T3   
4  10556103  GENETIC RESEARCH INSTITUTE OF THE DESERT               T3   

        AFFILIATION CLASSIFICATION      USE_CASE  ORGANIZATION  STATUS  \
0       Independent          C1000    ProductDev   Association       1   
1       Independent          C2000  Preservation  Co-operative       1   
2  CompanySponsored          C3000    ProductDev   Association       1   
3  CompanySponsored          C2000  Preservation         Trust       1   
4       Independent          C1000     Heathcare         Trust       1   

      INCOME_AMT SPECIAL_CONSIDERATIONS  ASK_AMT  IS_SUCCESSFUL  
0              0                      N     

In [5]:
# Step 1: Drop columns that aren't relevant for prediction
application_df = application_df.drop(columns=["EIN", "NAME"])

In [6]:
# Step 2: Encode categorical variables using One-Hot Encoding for columns with multiple categories
application_df = pd.get_dummies(application_df, drop_first=True)


In [7]:
# Step 3: Handle missing values (if any) - filling or dropping
# Check for missing values
if application_df.isnull().sum().any():
    application_df = application_df.dropna()  # Drop rows with missing values


In [8]:
# Step 4: Split the data into features (X) and target (y)
X = application_df.drop(columns=["IS_SUCCESSFUL"])
y = application_df["IS_SUCCESSFUL"]


In [9]:
# Step 5: Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [10]:

 #Step 7: Define the model-building function with hyperparameter tuning
def build_model(hp):
    model = tf.keras.Sequential()
    
    # Hyperparameter for the first hidden layer units
    model.add(tf.keras.layers.Dense(
        units=hp.Int('units_1', min_value=32, max_value=256, step=32),
        activation=hp.Choice('activation', values=['relu', 'tanh']),
        input_dim=X_train.shape[1]
    ))

    # Hyperparameter for adding additional layers
    for i in range(hp.Int('num_layers', 1, 4)):
        model.add(tf.keras.layers.Dense(
            units=hp.Int(f'units_{i+2}', min_value=32, max_value=256, step=32),
            activation=hp.Choice('activation', values=['relu', 'tanh'])
        ))

    # Output layer
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer=hp.Choice('optimizer', values=['adam', 'sgd']),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model


In [11]:
# Step 8: Initialize Keras Tuner with Hyperband
tuner = kt.Hyperband(
    build_model,
    objective='val_accuracy',
    max_epochs=10,
    factor=3,
    hyperband_iterations=1,
    directory='my_dir',
    project_name='charity_tuning'
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
# Step 9: Run the tuner to search for the best hyperparameters
tuner.search(X_train_scaled, y_train, epochs=10, validation_data=(X_test_scaled, y_test))

Trial 30 Complete [00h 00m 13s]
val_accuracy: 0.7279883623123169

Best val_accuracy So Far: 0.731195330619812
Total elapsed time: 00h 03m 13s


In [13]:
# Step 10: Get the best model
best_model = tuner.get_best_models(1)[0]

In [14]:
# Step 11: Evaluate the best model on the test dataset
loss, accuracy = best_model.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

215/215 - 0s - 2ms/step - accuracy: 0.7312 - loss: 0.5526
Test Loss: 0.5526480078697205
Test Accuracy: 0.731195330619812


In [15]:
# Step 12: Get the best hyperparameters
best_hyperparameters = tuner.get_best_hyperparameters(1)[0]
print("Best Hyperparameters:")
print(best_hyperparameters.values)


Best Hyperparameters:
{'units_1': 64, 'activation': 'relu', 'num_layers': 2, 'units_2': 256, 'optimizer': 'adam', 'units_3': 128, 'units_4': 192, 'units_5': 64, 'tuner/epochs': 10, 'tuner/initial_epoch': 0, 'tuner/bracket': 0, 'tuner/round': 0}


In [16]:
# Step 13: Save the best model
best_model.save('best_charity_model.h5')
print("Best model saved as 'best_charity_model.h5'")



Best model saved as 'best_charity_model.h5'
