In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

#data preparation modules
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

#neural network
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
#Prepare the Data for Use on a Neural Network Model
#Read the applicants_data.csv file into a Pandas DataFrame. Review the DataFrame, looking for categorical variables that will need to be encoded, 
#as well as columns that could eventually define your features and target variables.
#Drop the “EIN” (Employer Identification Number) and “NAME” columns from the DataFrame, because they are not relevant to the binary 
#classification model.

applicant_df=pd.read_csv(Path("./Resources/applicants_data.csv")).drop(['EIN','NAME'],axis=1)

display(applicant_df.head())

categorical_variables=list(applicant_df.dtypes[applicant_df.dtypes == 'object'].index)

categorical_variables

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


['APPLICATION_TYPE',
 'AFFILIATION',
 'CLASSIFICATION',
 'USE_CASE',
 'ORGANIZATION',
 'INCOME_AMT',
 'SPECIAL_CONSIDERATIONS']

In [3]:
#Encode the dataset’s categorical variables using OneHotEncoder, and then place the encoded variables into a new DataFrame.

# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False) 

encoded_data = enc.fit_transform(applicant_df[categorical_variables])

encoded_df=pd.DataFrame(encoded_data
                        ,columns=enc.get_feature_names(categorical_variables))

encoded_df.head()



Unnamed: 0,APPLICATION_TYPE_T10,APPLICATION_TYPE_T12,APPLICATION_TYPE_T13,APPLICATION_TYPE_T14,APPLICATION_TYPE_T15,APPLICATION_TYPE_T17,APPLICATION_TYPE_T19,APPLICATION_TYPE_T2,APPLICATION_TYPE_T25,APPLICATION_TYPE_T29,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [4]:
#Add the original DataFrame’s numerical variables to the DataFrame containing the encoded variables.
applicant_df2=pd.concat([applicant_df.drop(categorical_variables
                                           ,axis=1)
                         ,encoded_df]
                        ,axis=1)

applicant_df2

Unnamed: 0,STATUS,ASK_AMT,IS_SUCCESSFUL,APPLICATION_TYPE_T10,APPLICATION_TYPE_T12,APPLICATION_TYPE_T13,APPLICATION_TYPE_T14,APPLICATION_TYPE_T15,APPLICATION_TYPE_T17,APPLICATION_TYPE_T19,...,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1,5000,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,1,108590,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1,5000,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1,6692,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,1,142590,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,1,5000,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34295,1,5000,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34296,1,5000,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
34297,1,5000,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [5]:
# Using the preprocessed data, create the features (X) and target (y) datasets. The target dataset should be defined by the preprocessed 
# DataFrame column “IS_SUCCESSFUL”. The remaining columns should define the features dataset.

y=applicant_df2['IS_SUCCESSFUL']

X=applicant_df2.drop(['IS_SUCCESSFUL'],axis=1)

display(X.shape)
print(y.value_counts())

(34299, 116)

1    18261
0    16038
Name: IS_SUCCESSFUL, dtype: int64


In [6]:
#Split the features and target sets into training and testing datasets.

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

print(y_train.value_counts())
print(y_test.value_counts())

1    13723
0    12001
Name: IS_SUCCESSFUL, dtype: int64
1    4538
0    4037
Name: IS_SUCCESSFUL, dtype: int64


In [7]:
# Use scikit-learn's StandardScaler to scale the features data.
scaler=StandardScaler()

X_scaler=scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Compile and Evaluate a Binary Classification Model Using a Neural Network
### Deep Neural Network with 2 layers and 20 epochs

In [8]:
#Compile and Evaluate a Binary Classification Model Using a Neural Network

# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

number_output_neurons=1

# Define the number of hidden nodes for the first hidden layer
# Use the mean of the number of input features plus the number of output nurons
# Use the Python floor division (//) to return the quotent
hidden_nodes_layer1 = (number_input_features+  number_output_neurons) //2

# Define the number of hidden nodes for the second hidden layer
# Use the mean of the number of hidden nodes in the first hidden layer plus the number of output nurons
# Use the Python floor division (//) to return the quotent
hidden_nodes_layer2 = (hidden_nodes_layer1+number_output_neurons)//2

# Create the Sequential model instance
nn = Sequential()

# Add the first hidden layer specifying the number of inputs, the number of hidden nodes, and the activation function
nn.add(Dense(input_dim=number_input_features,units=hidden_nodes_layer1,activation='relu'))

# Add the second hidden layer specifying the number of hidden nodes and the activation function
nn.add(Dense(units=hidden_nodes_layer2,activation='relu'))

# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=1,activation='sigmoid'))


In [9]:
# Display the Sequential model summary
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 58)                6786      
                                                                 
 dense_1 (Dense)             (None, 29)                1711      
                                                                 
 dense_2 (Dense)             (None, 1)                 30        
                                                                 
Total params: 8,527
Trainable params: 8,527
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Compile the Sequential model
nn.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [11]:
# Fit the model using 100 epochs and the training data
fit_model = nn.fit(X_train_scaled,y_train,epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [12]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Loss: 0.5523096323013306, Accuracy: 0.7265306115150452


### Deep Neural Network with 3 layers and 20 epochs

In [13]:
#Compile and Evaluate a Binary Classification Model Using a Neural Network

# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

number_output_neurons=1

# Define the number of hidden nodes for the first hidden layer
# Use the mean of the number of input features plus the number of output nurons
hidden_nodes_layer1 = (number_input_features+  number_output_neurons) //2

# Define the number of hidden nodes for the second hidden layer
# Use the mean of the number of hidden nodes in the first hidden layer plus the number of output nurons
hidden_nodes_layer2 = (hidden_nodes_layer1+number_output_neurons)//2

# Define the number of hidden nodes for the third hidden layer
# Use the mean of the number of hidden nodes in the first hidden layer plus the number of output nurons
hidden_nodes_layer3 = (hidden_nodes_layer2+number_output_neurons)//2

# Create the Sequential model instance
nn1 = Sequential()

# Add the first hidden layer specifying the number of inputs, the number of hidden nodes, and the activation function
nn1.add(Dense(input_dim=number_input_features,units=hidden_nodes_layer1,activation='relu'))

# Add the second hidden layer specifying the number of hidden nodes and the activation function
nn1.add(Dense(units=hidden_nodes_layer2,activation='relu'))

# Add the third hidden layer specifying the number of hidden nodes and the activation function
nn1.add(Dense(units=hidden_nodes_layer3,activation='relu'))

# Add the output layer to the model specifying the number of output neurons and activation function
nn1.add(Dense(units=1,activation='sigmoid'))

# Compile the Sequential model
nn1.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

# model summary
display(nn1.summary())

# Fit the model using 100 epochs and the training data
fit_model = nn1.fit(X_train_scaled,y_train,epochs=20,verbose=2)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 58)                6786      
                                                                 
 dense_4 (Dense)             (None, 29)                1711      
                                                                 
 dense_5 (Dense)             (None, 15)                450       
                                                                 
 dense_6 (Dense)             (None, 1)                 16        
                                                                 
Total params: 8,963
Trainable params: 8,963
Non-trainable params: 0
_________________________________________________________________


None

Epoch 1/20
804/804 - 5s - loss: 0.5730 - accuracy: 0.7187 - 5s/epoch - 7ms/step
Epoch 2/20
804/804 - 3s - loss: 0.5512 - accuracy: 0.7301 - 3s/epoch - 4ms/step
Epoch 3/20
804/804 - 3s - loss: 0.5480 - accuracy: 0.7320 - 3s/epoch - 4ms/step
Epoch 4/20
804/804 - 3s - loss: 0.5463 - accuracy: 0.7338 - 3s/epoch - 4ms/step
Epoch 5/20
804/804 - 3s - loss: 0.5444 - accuracy: 0.7335 - 3s/epoch - 4ms/step
Epoch 6/20
804/804 - 3s - loss: 0.5441 - accuracy: 0.7327 - 3s/epoch - 4ms/step
Epoch 7/20
804/804 - 3s - loss: 0.5433 - accuracy: 0.7350 - 3s/epoch - 4ms/step
Epoch 8/20
804/804 - 3s - loss: 0.5424 - accuracy: 0.7352 - 3s/epoch - 4ms/step
Epoch 9/20
804/804 - 3s - loss: 0.5420 - accuracy: 0.7354 - 3s/epoch - 4ms/step
Epoch 10/20
804/804 - 3s - loss: 0.5414 - accuracy: 0.7357 - 3s/epoch - 4ms/step
Epoch 11/20
804/804 - 3s - loss: 0.5410 - accuracy: 0.7359 - 3s/epoch - 4ms/step
Epoch 12/20
804/804 - 3s - loss: 0.5409 - accuracy: 0.7374 - 3s/epoch - 4ms/step
Epoch 13/20
804/804 - 3s - loss: 0.54

In [14]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn1.evaluate(X_test_scaled, y_test)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Loss: 0.5518964529037476, Accuracy: 0.7292128205299377


In [21]:
# import matplotlib.pyplot as plt
# %matplotlib inline

# plt.plot(nn.history.history["loss"])
# plt.plot(nn1.history.history["loss"])
# plt.title("loss_function - Training - 2 hidden layer Vs. 3 hidden layer")
# plt.legend(["2 hidden layer", "3 hidden layers"])
# plt.show()

### Deep Neural Network with 2 layers and 20 epochs

In [20]:
#Compile and Evaluate a Binary Classification Model Using a Neural Network

# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

number_output_neurons=1

# Define the number of hidden nodes for the first hidden layer
# Use the mean of the number of input features plus the number of output nurons
hidden_nodes_layer1 = (number_input_features+  number_output_neurons + 50) //2

# Define the number of hidden nodes for the second hidden layer
# Use the mean of the number of hidden nodes in the first hidden layer plus the number of output nurons
hidden_nodes_layer2 = (hidden_nodes_layer1+number_output_neurons)//2

# Create the Sequential model instance
nn2 = Sequential()

# Add the first hidden layer specifying the number of inputs, the number of hidden nodes, and the activation function
nn2.add(Dense(input_dim=number_input_features,units=hidden_nodes_layer1,activation='relu'))

# Add the second hidden layer specifying the number of hidden nodes and the activation function
nn2.add(Dense(units=hidden_nodes_layer2,activation='relu'))

# Add the output layer to the model specifying the number of output neurons and activation function
nn2.add(Dense(units=1,activation='sigmoid'))

# Compile the Sequential model
nn2.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

# model summary
display(nn2.summary())

# Fit the model using 100 epochs and the training data
fit_model = nn2.fit(X_train_scaled,y_train,epochs=100,verbose=2)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 83)                9711      
                                                                 
 dense_8 (Dense)             (None, 42)                3528      
                                                                 
 dense_9 (Dense)             (None, 1)                 43        
                                                                 
Total params: 13,282
Trainable params: 13,282
Non-trainable params: 0
_________________________________________________________________


None

Epoch 1/100
804/804 - 5s - loss: 0.5728 - accuracy: 0.7211 - 5s/epoch - 6ms/step
Epoch 2/100
804/804 - 3s - loss: 0.5533 - accuracy: 0.7280 - 3s/epoch - 4ms/step
Epoch 3/100
804/804 - 3s - loss: 0.5496 - accuracy: 0.7325 - 3s/epoch - 4ms/step
Epoch 4/100
804/804 - 3s - loss: 0.5487 - accuracy: 0.7313 - 3s/epoch - 4ms/step
Epoch 5/100
804/804 - 3s - loss: 0.5462 - accuracy: 0.7320 - 3s/epoch - 4ms/step
Epoch 6/100
804/804 - 3s - loss: 0.5446 - accuracy: 0.7338 - 3s/epoch - 4ms/step
Epoch 7/100
804/804 - 3s - loss: 0.5439 - accuracy: 0.7346 - 3s/epoch - 4ms/step
Epoch 8/100
804/804 - 3s - loss: 0.5431 - accuracy: 0.7349 - 3s/epoch - 4ms/step
Epoch 9/100
804/804 - 3s - loss: 0.5425 - accuracy: 0.7342 - 3s/epoch - 4ms/step
Epoch 10/100
804/804 - 3s - loss: 0.5424 - accuracy: 0.7339 - 3s/epoch - 4ms/step
Epoch 11/100
804/804 - 3s - loss: 0.5406 - accuracy: 0.7365 - 3s/epoch - 4ms/step
Epoch 12/100
804/804 - 3s - loss: 0.5408 - accuracy: 0.7360 - 3s/epoch - 4ms/step
Epoch 13/100
804/804 - 3s

In [22]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn2.evaluate(X_test_scaled, y_test)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Loss: 0.5687777996063232, Accuracy: 0.7300291657447815


In [34]:
#saving the 3 models (2 layer dnn with 20 epochs)

file_path = Path("./Resources/DNN2_Epoch20.h5")
# Save model
nn.save(file_path)
# Save weights
nn.save_weights("./Resources/DNN2_Epoch20.h5")

In [32]:
#saving the 3 models (3 layer dnn with 20 epochs)

file_path = "./Resources/DNN3_Epoch20.h5"
# Save model
nn1.save(file_path)
# Save weights
nn1.save_weights("./Resources/DNN3_Epoch20.h5")

In [33]:
#saving the 3 models  (2 layer dnn with 100 epochs)

file_path = "./Resources/DNN2_Epoch100.h5"
# Save model
nn2.save(file_path)
# Save weights
nn2.save_weights("./Resources/DNN2_Epoch100.h5")