In [492]:
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder

In [493]:
applicant_data_df = pd.read_csv(Path("applicants_data.csv"))

# Review the DataFrame
applicant_data_df

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1
...,...,...,...,...,...,...,...,...,...,...,...,...
34294,996009318,THE LIONS CLUB OF HONOLULU KAMEHAMEHA,T4,Independent,C1000,ProductDev,Association,1,0,N,5000,0
34295,996010315,INTERNATIONAL ASSOCIATION OF LIONS CLUBS,T4,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
34296,996012607,PTA HAWAII CONGRESS,T3,CompanySponsored,C2000,Preservation,Association,1,0,N,5000,0
34297,996015768,AMERICAN FEDERATION OF GOVERNMENT EMPLOYEES LO...,T5,Independent,C3000,ProductDev,Association,1,0,N,5000,1


In [494]:
applicant_data_df.dtypes

EIN                        int64
NAME                      object
APPLICATION_TYPE          object
AFFILIATION               object
CLASSIFICATION            object
USE_CASE                  object
ORGANIZATION              object
STATUS                     int64
INCOME_AMT                object
SPECIAL_CONSIDERATIONS    object
ASK_AMT                    int64
IS_SUCCESSFUL              int64
dtype: object

In [495]:
""" dropping columns I don't want to caluculate so that I can create a better model """
applicant_data_df = applicant_data_df.drop(columns=["EIN", "NAME", "APPLICATION_TYPE", "USE_CASE", 
"AFFILIATION", "ORGANIZATION", "CLASSIFICATION"])

# Review the DataFrame
applicant_data_df

Unnamed: 0,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,1,0,N,5000,1
1,1,1-9999,N,108590,1
2,1,0,N,5000,0
3,1,10000-24999,N,6692,1
4,1,100000-499999,N,142590,1
...,...,...,...,...,...
34294,1,0,N,5000,0
34295,1,0,N,5000,0
34296,1,0,N,5000,0
34297,1,0,N,5000,1


In [496]:
""" creating a list of categorical variables to use in encoding"""
categorical_variables = list(applicant_data_df.dtypes[applicant_data_df.dtypes == "object"].index)
# Display the categorical variables list
categorical_variables

['INCOME_AMT', 'SPECIAL_CONSIDERATIONS']

In [497]:
""" creating a OneHotEncoder instance """
enc = OneHotEncoder(sparse=False)

In [498]:
""" encoding the data using fit_transform function """
encoded_data = enc.fit_transform(applicant_data_df[categorical_variables])
encoded_data

array([[1., 0., 0., ..., 0., 1., 0.],
       [0., 1., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 1., 0.],
       ...,
       [1., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.]])

In [499]:
""" adding the encoded data to a new dataframe """
encoded_df = pd.DataFrame(
    encoded_data,
    columns = enc.get_feature_names(categorical_variables)
)

# Review the DataFrame
encoded_df

Unnamed: 0,INCOME_AMT_0,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
34294,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34295,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34296,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34297,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [500]:
""" concatinating the encoded data with the orginal data """
encoded_df = pd.concat(
    [
        applicant_data_df[["STATUS", "ASK_AMT", "IS_SUCCESSFUL"]],
        encoded_df
    ],
    axis=1
)

# Review the Dataframe
encoded_df

Unnamed: 0,STATUS,ASK_AMT,IS_SUCCESSFUL,INCOME_AMT_0,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,5000,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,1,108590,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1,5000,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1,6692,1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,1,142590,1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,1,5000,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34295,1,5000,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34296,1,5000,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34297,1,5000,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [501]:
""" creating a y variable to use in predicitons """
y = encoded_df["IS_SUCCESSFUL"]

# Display a sample of y
y[0:5]

0    1
1    1
2    0
3    1
4    1
Name: IS_SUCCESSFUL, dtype: int64

In [502]:
""" creating an X variable to hold the encoded data """
X = encoded_df.drop(columns=["IS_SUCCESSFUL"])

# Review the features DataFrame
X

Unnamed: 0,STATUS,ASK_AMT,INCOME_AMT_0,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,5000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,1,108590,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1,5000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1,6692,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,1,142590,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,1,5000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34295,1,5000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34296,1,5000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34297,1,5000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [503]:
""" splitting the data into train and test data """
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


In [504]:
""" using StandardScaler to scale the X data """
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [505]:
""" using the len function to calculate how many input features(columns)"""
number_input_features = len(X_train.iloc[0])

# Review the number of features
number_input_features


13

In [506]:
""" defining the number for output neuron """
number_output_neurons = 1

In [507]:
""" creating first hidden node """
hidden_nodes_layer1 =  8

# Review the number hidden nodes in the first layer
hidden_nodes_layer1

8

In [508]:
""" creating second hidden node """
hidden_nodes_layer2 =  7

# Review the number hidden nodes in the second layer
hidden_nodes_layer2

7

In [509]:
""" creating third hidden node """
hidden_nodes_layer3 = 3

In [510]:
""" calling the Sequential instance """
nn_A3 = Sequential()

In [511]:
""" adding the 3 hidden nodes to the model """
nn_A3.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
nn_A3.add(Dense(units=hidden_nodes_layer2, activation="relu"))
nn_A3.add(Dense(units=hidden_nodes_layer3, activation="relu"))

In [512]:
""" adding the output layer"""
nn_A3.add(Dense(units=1, activation="sigmoid"))

In [513]:
""" viewing the summary of the model """
nn_A3.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_61 (Dense)             (None, 8)                 112       
_________________________________________________________________
dense_62 (Dense)             (None, 7)                 63        
_________________________________________________________________
dense_63 (Dense)             (None, 3)                 24        
_________________________________________________________________
dense_64 (Dense)             (None, 1)                 4         
Total params: 203
Trainable params: 203
Non-trainable params: 0
_________________________________________________________________


In [514]:
""" compiling the model"""
nn_A3.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])


In [515]:
""" fitting the model and running epochs to calculate the predicitons """
nn_A3.fit(X_train_scaled, y_train, epochs=40)


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40

KeyboardInterrupt: 

In [None]:
""" calculating the model accuracy and loss"""
model_loss, model_accuracy = nn_A3.evaluate(X_train_scaled, y_train, verbose=2)


804/804 - 1s - loss: 0.6862 - accuracy: 0.5360


In [None]:
""" printing the accuracy and loss reports """
print("Alternative Model 3 Results")

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn_A3.evaluate(X_train_scaled, y_train, verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Alternative Model 3 Results
804/804 - 0s - loss: 0.6862 - accuracy: 0.5360
Loss: 0.6862140893936157, Accuracy: 0.5359975099563599


In [None]:
""" saving the model weights to a .h5 file"""
file_path = "AlphabetSoup_A3.h5"

# Export your model to a HDF5 file
nn_A3.save_weights(file_path)