## Preprocessing

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
import pandas as pd 
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

In [None]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
columns_to_drop = ['EIN', 'NAME']
new_df = application_df.drop(columns=columns_to_drop, axis=1)
new_df.head()

In [None]:
new_df.describe()

In [None]:
# Determine the number of unique values in each column.
new_df.info()

In [None]:
# Look at APPLICATION_TYPE value counts for binning
new_df['APPLICATION_TYPE'].value_counts()

In [None]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
cutoff_value = 500
application_types_to_replace = ['T9', 'T13', 'T12', 'T2', 'T14', 'T25', 'T29', 'T15', 'T17']

print(application_types_to_replace)

In [None]:
# Replace in dataframe
for app in application_types_to_replace:
    new_df['APPLICATION_TYPE'] = new_df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure binning was successful
new_df['APPLICATION_TYPE'].value_counts()

In [None]:
new_df.head()

In [None]:
# Look at CLASSIFICATION value counts for binning
new_df['CLASSIFICATION'].value_counts()

In [None]:
classification_counts = new_df['CLASSIFICATION'].value_counts()
print(classification_counts)

In [None]:
# You may find it helpful to look at CLASSIFICATION value counts >1
greater_than_one = classification_counts[classification_counts > 1]
print(greater_than_one)

In [None]:
# Choose the cutoff value
cutoff_value = 300

# Count the occurrences of each classification
classification_counts = new_df['CLASSIFICATION'].value_counts()

# Create a list of classifications to be replaced
classifications_to_replace = list(classification_counts[classification_counts <= cutoff_value].index)

print(classifications_to_replace)

In [None]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
#cutoff_value = 500
#classifications_to_replace = []

#print(classifications_to_replace)

# Replace in dataframe
for cls in classifications_to_replace:
    new_df['CLASSIFICATION'] = new_df['CLASSIFICATION'].replace(cls,"Other")
    
# Check to make sure binning was successful
new_df['CLASSIFICATION'].value_counts()

In [None]:
application_df.info()

In [None]:
# Convert categorical data to numeric with `pd.get_dummies`
application_dummies = pd.get_dummies(new_df)

print(application_dummies)

In [None]:
# Split our preprocessed data into our features and target arrays

# Separate the target variable (output) from the features (input)
X = application_dummies.drop('IS_SUCCESSFUL', axis=1).values  # Features
y = application_dummies['IS_SUCCESSFUL'].values  # Target

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Get the input feature/ shape
X_train_scaled.shape

## Compile, Train and Evaluate the Model

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
# # Create a method that creates a new Sequential model with hyperparameter options
# def create_model(hp):
#     nn = tf.keras.models.Sequential()

#     # Allow kerastuner to decide which activation function to use in hidden layers
#     activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
#     # Allow kerastuner to decide number of neurons in first layer
#     nn.add(tf.keras.layers.Dense(units=hp.Int('first_units',
#         min_value=1,
#         max_value= 90,
#         step=5), activation=activation, input_dim=input_features))

#     # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
#     for i in range(hp.Int('num_layers', 1, 5)):
#         nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
#             min_value=1,
#             max_value=30,
#             step=5),
#             activation=activation))
    
#     nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

#     # Compile the model
#     nn.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
#     return nn

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features = X_train_scaled.shape[1]

nn = tf.keras.models.Sequential()

# # First hidden layer
nn.add(tf.keras.layers.Dense(32, activation='relu', input_shape=(input_features,)))

# Second hidden layer
# # Add more hidden layers (you can adjust the number of layers and nodes as needed)
# Second hidden layer with Leaky ReLU
nn.add(tf.keras.layers.Dense(16, activation=tf.keras.layers.LeakyReLU(alpha=0.01)))
nn.add(tf.keras.layers.Dense(16, activation='relu'))
nn.add(tf.keras.layers.Dense(16, activation='tanh'))

# Output layer
nn.add(tf.keras.layers.Dense(1, activation='sigmoid'))

# Compile the model
nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Check the structure of the model
nn.summary()


In [None]:
# Train the model
epochs = 30  # Number of training epochs (you can adjust this as needed)
batch_size = 10  # Number of samples per gradient update (you can adjust this as needed)

history = nn.fit(X_train_scaled, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# plotting the accuracy
history_df = pd.DataFrame(history.history, index = range(1, len(history.history['loss'])+1))
history_df.plot(y = 'accuracy')

In [None]:
# plotting the loss
history_df = pd.DataFrame(history.history, index = range(1, len(history.history['loss'])+1))
history_df.plot(y = 'loss')

In [None]:
# plotting the validation loss
history_df = pd.DataFrame(history.history, index = range(1, len(history.history['loss'])+1))
history_df.plot(y = 'val_loss')

In [None]:
# plotting the validation accuracy
history_df = pd.DataFrame(history.history, index = range(1, len(history.history['loss'])+1))
history_df.plot(y = 'val_accuracy')

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Create a DataFrame from the history dictionary
history_df = pd.DataFrame(history.history, index=range(1, len(history.history['loss']) + 1))

# Plot the 'accuracy' column
history_df.plot(y='accuracy')

# Add labels and title
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy per Epoch')

# Save the plot as a PNG image
plt.savefig('accuracy_plot-optimization.png')

# Show the plot (optional)
plt.show()

In [None]:
# Export our model to HDF5 file
model_filename = 'AlphabetSoupCharity-Optimization.keras'
nn.save(model_filename)