In [3]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

# Import and read the charity_data.csv
import pandas as pd 
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")

# Drop the non-beneficial ID columns, 'EIN' and 'NAME'
application_df = application_df.drop(columns=['EIN','NAME'])

# Determine the number of unique values in each column
unique_values = application_df.nunique()

# Determine the number of data points for each unique value in columns with more than 10 unique values
for column in application_df.columns:
    if unique_values[column] > 10:
        print(f"{column} unique value counts:")
        print(application_df[column].value_counts())
        print("\n")

# Create a new value called "Other" for rare categorical variables in the "APPLICATION_TYPE" column
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
application_types_to_replace = application_type_counts[application_type_counts < 200].index.tolist()
application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(application_types_to_replace, 'Other')

# Create a new value called "Other" for rare categorical variables in the "CLASSIFICATION" column
classification_counts = application_df['CLASSIFICATION'].value_counts()
classifications_to_replace = classification_counts[classification_counts < 1000].index.tolist()
application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(classifications_to_replace, 'Other')

# Convert categorical data to numeric with pd.get_dummies
application_df = pd.get_dummies(application_df)

# Define the features (X) and target (y) variables
X = application_df.drop(columns=['IS_SUCCESSFUL'])
y = application_df['IS_SUCCESSFUL']

# Split the preprocessed data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  80
hidden_nodes_layer2 = 30

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='AlphabetSoupCharity_Optimization.keras', # Change filepath to end with .keras
    verbose=1,
    save_weights_only=False, # Change to False to save the entire model
    save_freq=5)

fit_model = nn.fit(X_train_scaled, y_train, epochs=100, callbacks=[callback])

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# Export the model to HDF5 file
nn.save("AlphabetSoupCharity.h5")


APPLICATION_TYPE unique value counts:
APPLICATION_TYPE
T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: count, dtype: int64


CLASSIFICATION unique value counts:
CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: count, Length: 71, dtype: int64


ASK_AMT unique value counts:
ASK_AMT
5000        25398
10478           3
15583           3
63981           3
6725            3
            ...  
5371754         1
30060           1
43091152        1
18683           1
36500179        1
Name: count, Length: 8747, dtype: int64




  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m  1/804[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15:15[0m 1s/step - accuracy: 0.5312 - loss: 0.7697
Epoch 1: saving model to AlphabetSoupCharity_Optimization.keras
[1m  5/804[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m13s[0m 16ms/step - accuracy: 0.4961 - loss: 0.7535
Epoch 1: saving model to AlphabetSoupCharity_Optimization.keras

Epoch 1: saving model to AlphabetSoupCharity_Optimization.keras
[1m 15/804[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7s[0m 9ms/step - accuracy: 0.5282 - loss: 0.7339  
Epoch 1: saving model to AlphabetSoupCharity_Optimization.keras

Epoch 1: saving model to AlphabetSoupCharity_Optimization.keras
[1m 25/804[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6s[0m 8ms/step - accuracy: 0.5391 - loss: 0.7216
Epoch 1: saving model to AlphabetSoupCharity_Optimization.keras

Epoch 1: saving model to AlphabetSoupCharity_Optimization.keras
[1m 35/804[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5s[0m 7ms/step - accuracy: 0.5530 - loss: 0.7094
Epoch 1: saving model to Alphab



Loss: 0.5608265399932861, Accuracy: 0.726064145565033
