In [1]:
# Imports
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
 # Read the credit data file into a Pandas DataFrame
credit_data=Path("/Resources/credit_dataset_ML5b.csv")
    
credit_df=pd.read_csv(credit_data, low_memory=False)

In [3]:
credit_df.head()

Unnamed: 0,GENDER,CAR,INCOME_TYPE,EDUCATION_TYPE,FAMILY_TYPE,HOUSE_TYPE,FAMILY SIZE,AGE,INCOME,SOCIAL_STABILITY,SOCIAL_EXPOSURE,SOCIAL_QUALITY,LOAN_SOCIAL_VAR,LOAN_APPROVAL
0,M,Y,Working,Secondary / secondary special,Married,House / apartment,2,59,57583,0.32,0.12,0.87,0.033408,1
1,F,N,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,1,52,26317,0.32,0.56,0.1,0.01792,0
2,F,N,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,1,52,20458,0.31,0.21,0.03,0.001953,0
3,F,N,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,1,52,29275,0.31,0.11,0.87,0.029667,0
4,F,N,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,1,52,41992,0.32,0.12,0.1,0.00384,1


In [4]:
credit_df = credit_df.dropna().copy()

In [5]:
credit_df

Unnamed: 0,GENDER,CAR,INCOME_TYPE,EDUCATION_TYPE,FAMILY_TYPE,HOUSE_TYPE,FAMILY SIZE,AGE,INCOME,SOCIAL_STABILITY,SOCIAL_EXPOSURE,SOCIAL_QUALITY,LOAN_SOCIAL_VAR,LOAN_APPROVAL
0,M,Y,Working,Secondary / secondary special,Married,House / apartment,2,59,57583,0.32,0.12,0.87,0.033408,1
1,F,N,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,1,52,26317,0.32,0.56,0.10,0.017920,0
2,F,N,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,1,52,20458,0.31,0.21,0.03,0.001953,0
3,F,N,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,1,52,29275,0.31,0.11,0.87,0.029667,0
4,F,N,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,1,52,41992,0.32,0.12,0.10,0.003840,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11966,F,N,State servant,Secondary / secondary special,Married,House / apartment,2,52,25200,0.31,0.21,0.03,0.032364,0
11967,F,N,State servant,Secondary / secondary special,Married,House / apartment,2,52,45946,0.31,0.11,0.87,0.017360,1
11968,F,N,State servant,Secondary / secondary special,Married,House / apartment,2,52,21751,0.32,0.12,0.10,0.001953,0
11969,F,N,State servant,Secondary / secondary special,Married,House / apartment,2,52,58791,0.37,0.56,0.03,0.029667,1


In [6]:
# Review the data types associated with the columns
credit_df.dtypes

GENDER               object
CAR                  object
INCOME_TYPE          object
EDUCATION_TYPE       object
FAMILY_TYPE          object
HOUSE_TYPE           object
FAMILY SIZE           int64
AGE                   int64
INCOME                int64
SOCIAL_STABILITY    float64
SOCIAL_EXPOSURE     float64
SOCIAL_QUALITY      float64
LOAN_SOCIAL_VAR     float64
LOAN_APPROVAL         int64
dtype: object

In [7]:
credit_df["LOAN_APPROVAL"] = credit_df["LOAN_APPROVAL"].astype('object')

In [8]:
credit_df.dtypes


GENDER               object
CAR                  object
INCOME_TYPE          object
EDUCATION_TYPE       object
FAMILY_TYPE          object
HOUSE_TYPE           object
FAMILY SIZE           int64
AGE                   int64
INCOME                int64
SOCIAL_STABILITY    float64
SOCIAL_EXPOSURE     float64
SOCIAL_QUALITY      float64
LOAN_SOCIAL_VAR     float64
LOAN_APPROVAL        object
dtype: object

In [9]:
# Create a list of categorical variables 
categorical_variables = list(credit_df.dtypes[credit_df.dtypes == "object"].index)

# Display the categorical variables list
categorical_variables

['GENDER',
 'CAR',
 'INCOME_TYPE',
 'EDUCATION_TYPE',
 'FAMILY_TYPE',
 'HOUSE_TYPE',
 'LOAN_APPROVAL']

In [10]:
# Imports
from sklearn.preprocessing import OneHotEncoder

In [11]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Encode categorcal variables using OneHotEncoder
encoded_data = enc.fit_transform(credit_df[categorical_variables])

In [12]:
# Create a DataFrame with the encoded variables
# The column names should match those of the encoded variables
encoded_df = pd.DataFrame(
    encoded_data,
    columns = enc.get_feature_names(categorical_variables)
)

# Display the DataFrame
encoded_df.head()



Unnamed: 0,GENDER_F,GENDER_M,CAR_N,CAR_Y,INCOME_TYPE_Commercial associate,INCOME_TYPE_State servant,INCOME_TYPE_Student,INCOME_TYPE_Working,EDUCATION_TYPE_Academic degree,EDUCATION_TYPE_Higher education,...,FAMILY_TYPE_Unmarried,FAMILY_TYPE_Widow,HOUSE_TYPE_Co-op apartment,HOUSE_TYPE_House / apartment,HOUSE_TYPE_Municipal apartment,HOUSE_TYPE_Office apartment,HOUSE_TYPE_Rented apartment,HOUSE_TYPE_With parents,LOAN_APPROVAL_0,LOAN_APPROVAL_1
0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
4,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [13]:
 # Create a DataFrame with the columnns containing numerical variables from the original dataset
numerical_variables_df = credit_df.drop(columns = categorical_variables)

# Review the DataFrame
numerical_variables_df.head()

Unnamed: 0,FAMILY SIZE,AGE,INCOME,SOCIAL_STABILITY,SOCIAL_EXPOSURE,SOCIAL_QUALITY,LOAN_SOCIAL_VAR
0,2,59,57583,0.32,0.12,0.87,0.033408
1,1,52,26317,0.32,0.56,0.1,0.01792
2,1,52,20458,0.31,0.21,0.03,0.001953
3,1,52,29275,0.31,0.11,0.87,0.029667
4,1,52,41992,0.32,0.12,0.1,0.00384


In [14]:
# Using the Pandas concat function, combine the DataFrames the contain the encoded categorical data and the numerical data
cc_df = pd.concat(
    [
        numerical_variables_df,
        encoded_df
    ],
    axis=1
)

# Reveiw the DataFrame
cc_df.head()

Unnamed: 0,FAMILY SIZE,AGE,INCOME,SOCIAL_STABILITY,SOCIAL_EXPOSURE,SOCIAL_QUALITY,LOAN_SOCIAL_VAR,GENDER_F,GENDER_M,CAR_N,...,FAMILY_TYPE_Unmarried,FAMILY_TYPE_Widow,HOUSE_TYPE_Co-op apartment,HOUSE_TYPE_House / apartment,HOUSE_TYPE_Municipal apartment,HOUSE_TYPE_Office apartment,HOUSE_TYPE_Rented apartment,HOUSE_TYPE_With parents,LOAN_APPROVAL_0,LOAN_APPROVAL_1
0,2,59,57583,0.32,0.12,0.87,0.033408,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1,52,26317,0.32,0.56,0.1,0.01792,1.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1,52,20458,0.31,0.21,0.03,0.001953,1.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1,52,29275,0.31,0.11,0.87,0.029667,1.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
4,1,52,41992,0.32,0.12,0.1,0.00384,1.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [15]:
 # Define the target set y using the LOAN_APPROVAL_True column
y = cc_df["LOAN_APPROVAL_1"]

# Display a sample of y
y[:5]

0    1.0
1    0.0
2    0.0
3    0.0
4    1.0
Name: LOAN_APPROVAL_1, dtype: float64

In [16]:
# Define features set X by selecting all columns BUT LOAN_APPROVAL_True and LOAN_APPROVAL_False
X = cc_df.drop(columns=["LOAN_APPROVAL_1", "LOAN_APPROVAL_0"])

# Review the features DataFrame
X.head()

Unnamed: 0,FAMILY SIZE,AGE,INCOME,SOCIAL_STABILITY,SOCIAL_EXPOSURE,SOCIAL_QUALITY,LOAN_SOCIAL_VAR,GENDER_F,GENDER_M,CAR_N,...,FAMILY_TYPE_Married,FAMILY_TYPE_Single / not married,FAMILY_TYPE_Unmarried,FAMILY_TYPE_Widow,HOUSE_TYPE_Co-op apartment,HOUSE_TYPE_House / apartment,HOUSE_TYPE_Municipal apartment,HOUSE_TYPE_Office apartment,HOUSE_TYPE_Rented apartment,HOUSE_TYPE_With parents
0,2,59,57583,0.32,0.12,0.87,0.033408,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1,52,26317,0.32,0.56,0.1,0.01792,1.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1,52,20458,0.31,0.21,0.03,0.001953,1.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,1,52,29275,0.31,0.11,0.87,0.029667,1.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,1,52,41992,0.32,0.12,0.1,0.00384,1.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [17]:
# Split the data into training and testing datasets
# Assign the function a random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

#X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=78)

In [18]:
 # Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [19]:
X_train_scaled


array([[-0.2959806 ,  0.85606245,  0.42890285, ..., -0.08800555,
        -0.14344057, -0.23556367],
       [-0.2959806 ,  1.80177843,  0.0397344 , ..., -0.08800555,
        -0.14344057, -0.23556367],
       [-0.2959806 , -0.19473308, -0.08399318, ..., -0.08800555,
        -0.14344057, -0.23556367],
       ...,
       [ 0.79174507, -1.14044906,  0.27250562, ..., -0.08800555,
        -0.14344057, -0.23556367],
       [ 0.79174507, -0.50997174,  0.44341304, ..., -0.08800555,
        -0.14344057, -0.23556367],
       [-0.2959806 ,  1.38146022,  1.10836194, ..., -0.08800555,
        -0.14344057, -0.23556367]])

In [20]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Define the number of hidden nodes for the first hidden layer
# Use the mean of the number of input features plus the number of output nurons
# Use the Python floor division (//) to return the quotent
hidden_nodes_layer1 =  (number_input_features + 1) // 2 

# Define the number of hidden nodes for the second hidden layer
# Use the mean of the number of hidden nodes in the first hidden layer plus the number of output nurons
# Use the Python floor division (//) to return the quotent
hidden_nodes_layer2 = (hidden_nodes_layer1 + 1) // 2



# Create the Sequential model instance
nn = Sequential()

# Add the first hidden layer specifying the number of inputs, the number of hidden nodes, and the activation function
nn.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="tanh"))

# Add the second hidden layer specifying the number of hidden nodes and the activation function
nn.add(Dense(units=hidden_nodes_layer2, activation="tanh"))




# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=1, activation="relu"))

In [21]:
number_input_features


31

In [22]:
hidden_nodes_layer1

16

In [23]:
hidden_nodes_layer2

8

In [24]:
 # Display the Sequential model summary
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 16)                512       
                                                                 
 dense_1 (Dense)             (None, 8)                 136       
                                                                 
 dense_2 (Dense)             (None, 1)                 9         
                                                                 
Total params: 657
Trainable params: 657
Non-trainable params: 0
_________________________________________________________________


In [25]:
# Compile the Sequential model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [26]:
 # Fit the model using 50 epochs and the training data
fit_model = nn.fit(X_train_scaled, y_train, epochs=70)

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


In [27]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

94/94 - 0s - loss: 0.0536 - accuracy: 0.9880 - 370ms/epoch - 4ms/step
Loss: 0.053557753562927246, Accuracy: 0.9879719614982605
