In [1]:
# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

# Load the data
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()


Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [2]:
# Drop non-beneficial ID columns
application_df = application_df.drop(columns=['EIN', 'NAME'])


In [3]:
# Bin and replace rare application types
application_types_to_replace = application_df['APPLICATION_TYPE'].value_counts()[application_df['APPLICATION_TYPE'].value_counts() < 1000].index
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app, "Other")

In [4]:
# Convert categorical data to numeric with `pd.get_dummies`
application_df = pd.get_dummies(application_df)

# Split the data into features and target arrays
y = application_df["IS_SUCCESSFUL"].values
X = application_df.drop(columns=["IS_SUCCESSFUL"]).values


In [5]:
# Split the data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [6]:
# Define the model
nn = tf.keras.models.Sequential()


In [7]:
# First hidden layer with more neurons
nn.add(tf.keras.layers.Dense(units=100, activation="relu", input_dim=len(X_train_scaled[0])))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# Second hidden layer with more neurons
nn.add(tf.keras.layers.Dense(units=50, activation="relu"))


In [9]:
# Third hidden layer
nn.add(tf.keras.layers.Dense(units=25, activation="relu"))

In [10]:
# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

In [11]:
# Compile the model
nn.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])


In [12]:
# Train the model with more epochs
fit_model = nn.fit(X_train_scaled, y_train, epochs=150)


Epoch 1/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 460us/step - accuracy: 0.7104 - loss: 0.5888
Epoch 2/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 453us/step - accuracy: 0.7312 - loss: 0.5494
Epoch 3/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 466us/step - accuracy: 0.7311 - loss: 0.5534
Epoch 4/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 455us/step - accuracy: 0.7344 - loss: 0.5483
Epoch 5/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 489us/step - accuracy: 0.7321 - loss: 0.5472
Epoch 6/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 559us/step - accuracy: 0.7331 - loss: 0.5461
Epoch 7/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 508us/step - accuracy: 0.7396 - loss: 0.5408
Epoch 8/150
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 497us/step - accuracy: 0.7314 - loss: 0.5485
Epoch 9/150
[1m

In [13]:
# Evaluate the model
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

215/215 - 0s - 666us/step - accuracy: 0.7270 - loss: 0.5835
Loss: 0.5834640860557556, Accuracy: 0.7269679307937622


In [14]:
#SECOND ATTEMPT


In [15]:
# Define the model
nn = tf.keras.models.Sequential()

# First hidden layer with more neurons and 'tanh' activation function
nn.add(tf.keras.layers.Dense(units=128, activation="tanh", input_dim=len(X_train_scaled[0])))

# Second hidden layer with more neurons
nn.add(tf.keras.layers.Dense(units=64, activation="tanh"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=32, activation="tanh"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))


In [16]:
# Compile the model with a different optimizer
nn.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Check the structure of the model
nn.summary()

In [17]:
# Train the model with more epochs
fit_model = nn.fit(X_train_scaled, y_train, epochs=200, verbose=1)


Epoch 1/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 533us/step - accuracy: 0.7132 - loss: 0.5808
Epoch 2/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 530us/step - accuracy: 0.7301 - loss: 0.5514
Epoch 3/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 510us/step - accuracy: 0.7337 - loss: 0.5500
Epoch 4/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 531us/step - accuracy: 0.7355 - loss: 0.5464
Epoch 5/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 584us/step - accuracy: 0.7282 - loss: 0.5505
Epoch 6/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 576us/step - accuracy: 0.7338 - loss: 0.5460
Epoch 7/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 515us/step - accuracy: 0.7312 - loss: 0.5469
Epoch 8/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 511us/step - accuracy: 0.7353 - loss: 0.5447
Epoch 9/200
[1m

In [18]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


215/215 - 0s - 2ms/step - accuracy: 0.7292 - loss: 0.5702
Loss: 0.5702114105224609, Accuracy: 0.7291545271873474


In [19]:
#THIRD ATTEMPT

In [20]:
# Define the model
nn = tf.keras.models.Sequential()

# First hidden layer with increased neurons and dropout
nn.add(tf.keras.layers.Dense(units=256, activation="relu", input_dim=len(X_train_scaled[0])))
nn.add(tf.keras.layers.Dropout(0.5))  # 50% dropout rate

# Second hidden layer with increased neurons and dropout
nn.add(tf.keras.layers.Dense(units=128, activation="relu"))
nn.add(tf.keras.layers.Dropout(0.5))  # 50% dropout rate

# Third hidden layer with increased neurons and dropout
nn.add(tf.keras.layers.Dense(units=64, activation="relu"))
nn.add(tf.keras.layers.Dropout(0.5))  # 50% dropout rate

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the model with RMSprop optimizer
nn.compile(optimizer="RMSprop", loss="binary_crossentropy", metrics=["accuracy"])

# Check the structure of the model
nn.summary()


In [21]:
# Train the model with the same number of epochs
fit_model = nn.fit(X_train_scaled, y_train, epochs=200, verbose=1)


Epoch 1/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 878us/step - accuracy: 0.6610 - loss: 0.6726
Epoch 2/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7183 - loss: 0.6002
Epoch 3/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7279 - loss: 0.5863
Epoch 4/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 973us/step - accuracy: 0.7306 - loss: 0.5756
Epoch 5/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 854us/step - accuracy: 0.7327 - loss: 0.5759
Epoch 6/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 862us/step - accuracy: 0.7341 - loss: 0.5696
Epoch 7/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 937us/step - accuracy: 0.7295 - loss: 0.5755
Epoch 8/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 912us/step - accuracy: 0.7288 - loss: 0.5721
Epoch 9/200
[1m858/

In [22]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


215/215 - 0s - 745us/step - accuracy: 0.7217 - loss: 0.5771
Loss: 0.5771196484565735, Accuracy: 0.7217200994491577


In [23]:
# Save the optimized model to an HDF5 file
nn.save("AlphabetSoupCharity_Optimization_Attempt3.h5")



In [24]:
# Train the model and store the training history
history = nn.fit(X_train_scaled, y_train, epochs=200, verbose=1)


Epoch 1/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 852us/step - accuracy: 0.7200 - loss: 0.5928
Epoch 2/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 923us/step - accuracy: 0.7205 - loss: 0.5952
Epoch 3/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 842us/step - accuracy: 0.7173 - loss: 0.5998
Epoch 4/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 843us/step - accuracy: 0.7200 - loss: 0.6030
Epoch 5/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 838us/step - accuracy: 0.7143 - loss: 0.6025
Epoch 6/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 890us/step - accuracy: 0.7198 - loss: 0.6028
Epoch 7/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 857us/step - accuracy: 0.7148 - loss: 0.5980
Epoch 8/200
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 856us/step - accuracy: 0.7163 - loss: 0.5980
Epoch 9/200
[1m