In [1]:
# Import dependencies
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load data
url = "https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv"
application_df = pd.read_csv(url)

# Display the first few rows
application_df.head()


Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [2]:
application_df.drop(columns=['EIN', 'NAME'], inplace=True)


In [3]:
# APPLICATION_TYPE
application_counts = application_df['APPLICATION_TYPE'].value_counts()
application_types_to_replace = application_counts[application_counts < 500].index
application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(application_types_to_replace, "Other")

# CLASSIFICATION
classification_counts = application_df['CLASSIFICATION'].value_counts()
classifications_to_replace = classification_counts[classification_counts < 1000].index
application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(classifications_to_replace, "Other")


In [4]:
application_df = pd.get_dummies(application_df)


In [5]:
X = application_df.drop(columns=['IS_SUCCESSFUL'])
y = application_df['IS_SUCCESSFUL']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [8]:
# Define the model
nn = tf.keras.models.Sequential()

# Input Layer and First Hidden Layer
nn.add(tf.keras.layers.Dense(units=128, input_dim=len(X_train_scaled[0]), activation="relu"))

# Second Hidden Layer
nn.add(tf.keras.layers.Dense(units=64, activation="relu"))

# Third Hidden Layer (Optional)
nn.add(tf.keras.layers.Dense(units=32, activation="tanh"))

# Output Layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the model
nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [21]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=200, batch_size=32)

Epoch 1/200
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7356 - loss: 0.5508
Epoch 2/200
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7354 - loss: 0.5499
Epoch 3/200
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7329 - loss: 0.5495
Epoch 4/200
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7394 - loss: 0.5432
Epoch 5/200
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7322 - loss: 0.5527
Epoch 6/200
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7362 - loss: 0.5525
Epoch 7/200
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7395 - loss: 0.5457
Epoch 8/200
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7418 - loss: 0.5440
Epoch 9/200
[1m804/804[0m [32

In [10]:
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - 675us/step - accuracy: 0.7292 - loss: 0.5630
Loss: 0.563023030757904, Accuracy: 0.7292128205299377


Attemp 2

In [11]:
# Log transform ASK_AMT to reduce skewness
application_df['ASK_AMT'] = np.log1p(application_df['ASK_AMT'])


In [12]:
# Define the neural network model
nn = tf.keras.models.Sequential()

# Input Layer and Hidden Layers
nn.add(tf.keras.layers.Dense(units=256, input_dim=len(X_train_scaled[0]), activation="relu"))
nn.add(tf.keras.layers.Dense(units=128, activation="relu"))
nn.add(tf.keras.layers.Dense(units=64, activation="relu"))
nn.add(tf.keras.layers.Dropout(0.2))  # Dropout layer to prevent overfitting

# Output Layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the model
nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [13]:
# Train the model with early stopping
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
fit_model = nn.fit(X_train_scaled, y_train,
                   epochs=300,
                   batch_size=32,
                   validation_split=0.2,
                   callbacks=[callback])


Epoch 1/300
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7143 - loss: 0.5881 - val_accuracy: 0.7326 - val_loss: 0.5543
Epoch 2/300
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 916us/step - accuracy: 0.7262 - loss: 0.5602 - val_accuracy: 0.7372 - val_loss: 0.5561
Epoch 3/300
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 895us/step - accuracy: 0.7301 - loss: 0.5569 - val_accuracy: 0.7380 - val_loss: 0.5475
Epoch 4/300
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 896us/step - accuracy: 0.7298 - loss: 0.5515 - val_accuracy: 0.7370 - val_loss: 0.5503
Epoch 5/300
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 907us/step - accuracy: 0.7316 - loss: 0.5573 - val_accuracy: 0.7333 - val_loss: 0.5525
Epoch 6/300
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 944us/step - accuracy: 0.7306 - loss: 0.5533 - val_accuracy: 0.7359 - val_loss: 0.5496
Epoch 7/300


In [14]:
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - 477us/step - accuracy: 0.7280 - loss: 0.5565
Loss: 0.5565275549888611, Accuracy: 0.7280466556549072


Attempt 3

In [15]:
# Updated Neural Network Model
nn = tf.keras.models.Sequential()

# Input Layer and Hidden Layers
nn.add(tf.keras.layers.Input(shape=(len(X_train_scaled[0]),)))
nn.add(tf.keras.layers.Dense(units=512, activation="relu"))  # Increased neurons
nn.add(tf.keras.layers.Dense(units=256, activation="relu"))
nn.add(tf.keras.layers.Dense(units=128, activation="relu"))
nn.add(tf.keras.layers.Dropout(0.3))  # Increased dropout rate
nn.add(tf.keras.layers.Dense(units=64, activation="relu"))

# Output Layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the model
nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [16]:
# Train the model with increased patience
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
fit_model = nn.fit(X_train_scaled, y_train,
                   epochs=500,  # Increased epochs
                   batch_size=32,
                   validation_split=0.2,
                   callbacks=[callback])


Epoch 1/500
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7135 - loss: 0.5874 - val_accuracy: 0.7320 - val_loss: 0.5534
Epoch 2/500
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7290 - loss: 0.5575 - val_accuracy: 0.7368 - val_loss: 0.5553
Epoch 3/500
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7211 - loss: 0.5646 - val_accuracy: 0.7376 - val_loss: 0.5549
Epoch 4/500
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7233 - loss: 0.5635 - val_accuracy: 0.7362 - val_loss: 0.5474
Epoch 5/500
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7320 - loss: 0.5562 - val_accuracy: 0.7322 - val_loss: 0.5544
Epoch 6/500
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7335 - loss: 0.5496 - val_accuracy: 0.7394 - val_loss: 0.5536
Epoch 7/500
[1m644/64

In [17]:
X = application_df.drop(columns=['IS_SUCCESSFUL', 'ASK_AMT'])  # Dropping 'ASK_AMT' as a test

In [18]:
# Compile the model with RMSprop
nn.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
           loss='binary_crossentropy',
           metrics=['accuracy'])

In [19]:
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - 1ms/step - accuracy: 0.7283 - loss: 0.5548
Loss: 0.554751992225647, Accuracy: 0.7282798886299133


In [20]:
# Save the model
nn.save("AlphabetSoupCharity_Optimization.h5")

