# Credit Card approval prediction

## Import all dependencies

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
from sqlalchemy import create_engine

## Connect to data base

In [2]:
# AWS Server password in config.py file
from config import db_password

In [3]:
# Direction to AWS RDS Server
url_AWS = "dbproject.cprghtxfxmsc.us-east-1.rds.amazonaws.com"
# Database name in Server
db_name = "postgres"

# Connection string
db_string = f"postgresql://postgres:{db_password}@{url_AWS}:5432/{db_name}"

db = create_engine(db_string)

In [4]:
# Read and safe dataframe
Credit_result = db.execute("SELECT * FROM credit_join")
print(Credit_result)

<sqlalchemy.engine.cursor.LegacyCursorResult object at 0x0000029B04FFB808>


## Start data preprocessing

In [5]:
# Set columns names on the dataframe
application_df = pd.DataFrame(Credit_result,columns=('ID','CODE_GENDER','FLAG_OWN_CAR','FLAG_OWN_REALTY','CNT_CHILDREN','AMT_INCOME_TOTAL','NAME_EDUCATION_TYPE','NAME_FAMILY_STATUS','NAME_HOUSING_TYPE','DAYS_BIRTH','DAYS_EMPLOYED','FLAG_MOBIL','FLAG_WORK_PHONE','FLAG_PHONE','FLAG_EMAIL','JOB','BEGIN_MONTHS','STATUS','TARGET')).sort_values(by=['ID'],ascending = True)
application_df

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,JOB,BEGIN_MONTHS,STATUS,TARGET
522011,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,-13,0,0
411853,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,-29,X,0
412434,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,-8,0,0
459002,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,0,C,0
187203,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,-10,X,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44504,5150487,M,Y,N,No children,"$202,500.00",Secondary / secondary special,Married,House / apartment,-19398,-2166,True,False,False,False,Drivers,-14,C,0
123112,5150487,M,Y,N,No children,"$202,500.00",Secondary / secondary special,Married,House / apartment,-19398,-2166,True,False,False,False,Drivers,-28,C,0
70995,5150487,M,Y,N,No children,"$202,500.00",Secondary / secondary special,Married,House / apartment,-19398,-2166,True,False,False,False,Drivers,-15,C,0
428080,5150487,M,Y,N,No children,"$202,500.00",Secondary / secondary special,Married,House / apartment,-19398,-2166,True,False,False,False,Drivers,-2,C,0


### Check data characteristics

In [6]:
# Check the total number of applications
application_df['ID'].count()

537667

In [7]:
# Identify data types per columns on the dataframe
application_df.dtypes

ID                      int64
CODE_GENDER            object
FLAG_OWN_CAR           object
FLAG_OWN_REALTY        object
CNT_CHILDREN           object
AMT_INCOME_TOTAL       object
NAME_EDUCATION_TYPE    object
NAME_FAMILY_STATUS     object
NAME_HOUSING_TYPE      object
DAYS_BIRTH              int64
DAYS_EMPLOYED           int64
FLAG_MOBIL               bool
FLAG_WORK_PHONE          bool
FLAG_PHONE               bool
FLAG_EMAIL               bool
JOB                    object
BEGIN_MONTHS            int64
STATUS                 object
TARGET                  int64
dtype: object

In [8]:
# Find null values
for column in application_df.columns:
    print(f'Columns {column} has {application_df[column].isnull().sum()} nullvalues')

Columns ID has 0 nullvalues
Columns CODE_GENDER has 0 nullvalues
Columns FLAG_OWN_CAR has 0 nullvalues
Columns FLAG_OWN_REALTY has 0 nullvalues
Columns CNT_CHILDREN has 0 nullvalues
Columns AMT_INCOME_TOTAL has 0 nullvalues
Columns NAME_EDUCATION_TYPE has 0 nullvalues
Columns NAME_FAMILY_STATUS has 0 nullvalues
Columns NAME_HOUSING_TYPE has 0 nullvalues
Columns DAYS_BIRTH has 0 nullvalues
Columns DAYS_EMPLOYED has 0 nullvalues
Columns FLAG_MOBIL has 0 nullvalues
Columns FLAG_WORK_PHONE has 0 nullvalues
Columns FLAG_PHONE has 0 nullvalues
Columns FLAG_EMAIL has 0 nullvalues
Columns JOB has 0 nullvalues
Columns BEGIN_MONTHS has 0 nullvalues
Columns STATUS has 0 nullvalues
Columns TARGET has 0 nullvalues


In [9]:
# Determine the number of unique values in each column.
application_df.nunique()

ID                     25134
CODE_GENDER                2
FLAG_OWN_CAR               2
FLAG_OWN_REALTY            2
CNT_CHILDREN               3
AMT_INCOME_TOTAL         195
NAME_EDUCATION_TYPE        5
NAME_FAMILY_STATUS         5
NAME_HOUSING_TYPE          6
DAYS_BIRTH              5206
DAYS_EMPLOYED           3299
FLAG_MOBIL                 1
FLAG_WORK_PHONE            2
FLAG_PHONE                 2
FLAG_EMAIL                 2
JOB                       18
BEGIN_MONTHS              61
STATUS                     8
TARGET                     2
dtype: int64

### Start manipulating the date to create the dataframe for the model

In [10]:
# Get unique IDs of the applications
unique_id = pd.DataFrame(application_df['ID'].drop_duplicates())
unique_id.reset_index(drop= True)

Unnamed: 0,ID
0,5008806
1,5008808
2,5008809
3,5008810
4,5008811
...,...
25129,5150482
25130,5150483
25131,5150484
25132,5150485


In [11]:
# Create dataframe with high risk applicants
df = pd.DataFrame()
for x in unique_id['ID'].tolist():
    for y in application_df.index[application_df['ID'] == x].tolist():
        if (application_df['TARGET'][y] == 1):
            df = df.append(application_df.loc[[y]])
            break

In [12]:
# Organize the columns of the dataframe and sort values by ID
df = df[['ID','CODE_GENDER','FLAG_OWN_CAR','FLAG_OWN_REALTY','CNT_CHILDREN','AMT_INCOME_TOTAL','NAME_EDUCATION_TYPE','NAME_FAMILY_STATUS','NAME_HOUSING_TYPE','DAYS_BIRTH','DAYS_EMPLOYED','FLAG_MOBIL','FLAG_WORK_PHONE','FLAG_PHONE','FLAG_EMAIL','JOB','BEGIN_MONTHS','STATUS','TARGET']]
df.sort_values(by=['ID'],ascending=True)
df

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,JOB,BEGIN_MONTHS,STATUS,TARGET
33275,5009524,M,Y,N,1 children,"$112,500.00",Secondary / secondary special,Separated,Co-op apartment,-11998,-1135,True,True,False,False,Laborers,-10,2,1
313162,5009628,F,N,N,No children,"$238,500.00",Secondary / secondary special,Married,House / apartment,-19305,-3296,True,True,False,False,Laborers,-32,5,1
159525,5009938,F,N,Y,2+ children,"$157,500.00",Secondary / secondary special,Married,House / apartment,-10710,-2351,True,False,False,False,Sales staff,-21,2,1
122064,5010396,F,Y,Y,1 children,"$225,000.00",Higher education,Married,House / apartment,-14148,-1591,True,True,True,False,High skill tech staff,-4,2,1
126496,5010535,F,Y,N,2+ children,"$135,000.00",Secondary / secondary special,Married,House / apartment,-13928,-3052,True,False,False,False,Laborers,-50,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19808,5149834,F,N,Y,No children,"$157,500.00",Higher education,Married,House / apartment,-12387,-1325,True,False,True,True,Medicine staff,-5,5,1
390514,5149838,F,N,Y,No children,"$157,500.00",Higher education,Married,House / apartment,-12387,-1325,True,False,True,True,Medicine staff,-14,5,1
383028,5150049,F,N,Y,No children,"$283,500.00",Secondary / secondary special,Married,House / apartment,-17958,-655,True,False,False,False,Sales staff,0,2,1
16736,5150238,F,Y,Y,No children,"$90,000.00",Secondary / secondary special,Married,House / apartment,-19084,-128,True,True,False,False,Laborers,-5,2,1


In [13]:
# Confirm the number of low risk applications. Eliminating the high risk applicants
for x in df['ID']:
    unique_id = unique_id.drop(unique_id.loc[unique_id['ID']==x].index)
unique_id['ID'].count()

24712

In [14]:
# Remove all high risk applications from the original dataframe (there are duplicated applications)
for x in df['ID']:
    application_df = application_df.drop(application_df.loc[application_df['ID']==x].index)
application_df

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,JOB,BEGIN_MONTHS,STATUS,TARGET
522011,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,-13,0,0
411853,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,-29,X,0
412434,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,-8,0,0
459002,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,0,C,0
187203,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,-10,X,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44504,5150487,M,Y,N,No children,"$202,500.00",Secondary / secondary special,Married,House / apartment,-19398,-2166,True,False,False,False,Drivers,-14,C,0
123112,5150487,M,Y,N,No children,"$202,500.00",Secondary / secondary special,Married,House / apartment,-19398,-2166,True,False,False,False,Drivers,-28,C,0
70995,5150487,M,Y,N,No children,"$202,500.00",Secondary / secondary special,Married,House / apartment,-19398,-2166,True,False,False,False,Drivers,-15,C,0
428080,5150487,M,Y,N,No children,"$202,500.00",Secondary / secondary special,Married,House / apartment,-19398,-2166,True,False,False,False,Drivers,-2,C,0


In [15]:
# Remove duplicated low risk applications 
application_df.drop_duplicates(subset="ID", inplace=True)
application_df

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,JOB,BEGIN_MONTHS,STATUS,TARGET
522011,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,-13,0,0
282991,5008808,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,True,False,True,True,Sales staff,-2,0,0
81599,5008809,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,True,False,True,True,Sales staff,-23,X,0
49983,5008810,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,True,False,True,True,Sales staff,-8,C,0
62109,5008811,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,True,False,True,True,Sales staff,-15,C,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159544,5150482,F,Y,Y,1 children,"$157,500.00",Secondary / secondary special,Married,House / apartment,-10808,-1739,True,False,False,False,Core staff,-22,0,0
418720,5150483,F,Y,Y,1 children,"$157,500.00",Secondary / secondary special,Married,House / apartment,-10808,-1739,True,False,False,False,Core staff,0,X,0
453767,5150484,F,Y,Y,1 children,"$157,500.00",Secondary / secondary special,Married,House / apartment,-10808,-1739,True,False,False,False,Core staff,-10,0,0
136580,5150485,F,Y,Y,1 children,"$157,500.00",Secondary / secondary special,Married,House / apartment,-10808,-1739,True,False,False,False,Core staff,-1,0,0


In [16]:
# Append single low risk application dataframe and single high risk applications
application_df = application_df.append(df)
application_df

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,JOB,BEGIN_MONTHS,STATUS,TARGET
522011,5008806,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-21474,-1134,True,False,False,False,Security staff,-13,0,0
282991,5008808,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,True,False,True,True,Sales staff,-2,0,0
81599,5008809,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,True,False,True,True,Sales staff,-23,X,0
49983,5008810,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,True,False,True,True,Sales staff,-8,C,0
62109,5008811,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,True,False,True,True,Sales staff,-15,C,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19808,5149834,F,N,Y,No children,"$157,500.00",Higher education,Married,House / apartment,-12387,-1325,True,False,True,True,Medicine staff,-5,5,1
390514,5149838,F,N,Y,No children,"$157,500.00",Higher education,Married,House / apartment,-12387,-1325,True,False,True,True,Medicine staff,-14,5,1
383028,5150049,F,N,Y,No children,"$283,500.00",Secondary / secondary special,Married,House / apartment,-17958,-655,True,False,False,False,Sales staff,0,2,1
16736,5150238,F,Y,Y,No children,"$90,000.00",Secondary / secondary special,Married,House / apartment,-19084,-128,True,True,False,False,Laborers,-5,2,1


### Create the final dataframe selecting the features to use

In [17]:
# Drop the non-beneficial columns, 'DAYS_BIRTH', 'BEGIN_MONTHS'.
application_df.drop(['ID','DAYS_BIRTH','BEGIN_MONTHS'],1, inplace =True)
application_df

Unnamed: 0,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,JOB,STATUS,TARGET
522011,M,Y,Y,No children,"$112,500.00",Secondary / secondary special,Married,House / apartment,-1134,True,False,False,False,Security staff,0,0
282991,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-3051,True,False,True,True,Sales staff,0,0
81599,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-3051,True,False,True,True,Sales staff,X,0
49983,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-3051,True,False,True,True,Sales staff,C,0
62109,F,N,Y,No children,"$270,000.00",Secondary / secondary special,Single / not married,House / apartment,-3051,True,False,True,True,Sales staff,C,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19808,F,N,Y,No children,"$157,500.00",Higher education,Married,House / apartment,-1325,True,False,True,True,Medicine staff,5,1
390514,F,N,Y,No children,"$157,500.00",Higher education,Married,House / apartment,-1325,True,False,True,True,Medicine staff,5,1
383028,F,N,Y,No children,"$283,500.00",Secondary / secondary special,Married,House / apartment,-655,True,False,False,False,Sales staff,2,1
16736,F,Y,Y,No children,"$90,000.00",Secondary / secondary special,Married,House / apartment,-128,True,True,False,False,Laborers,2,1


## Start Machine Learning Model (Neuronal Networks)

In [18]:
# Generate our categorical variable lists
application_cat = application_df.dtypes[application_df.dtypes == "object"].index.tolist()

In [19]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(application_df[application_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(application_cat)
encode_df.head()

Unnamed: 0,CODE_GENDER_F,CODE_GENDER_M,FLAG_OWN_CAR_N,FLAG_OWN_CAR_Y,FLAG_OWN_REALTY_N,FLAG_OWN_REALTY_Y,CNT_CHILDREN_1 children,CNT_CHILDREN_2+ children,CNT_CHILDREN_No children,"AMT_INCOME_TOTAL_$1,125,000.00",...,JOB_Security staff,JOB_Waiters/barmen staff,STATUS_0,STATUS_1,STATUS_2,STATUS_3,STATUS_4,STATUS_5,STATUS_C,STATUS_X
0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [20]:
# Merge one-hot encoded features and drop the originals
application_df = application_df.merge(encode_df,left_index=True, right_index=True)
application_df = application_df.drop(application_cat,1)
application_df.head()

Unnamed: 0,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,TARGET,CODE_GENDER_F,CODE_GENDER_M,FLAG_OWN_CAR_N,FLAG_OWN_CAR_Y,...,JOB_Security staff,JOB_Waiters/barmen staff,STATUS_0,STATUS_1,STATUS_2,STATUS_3,STATUS_4,STATUS_5,STATUS_C,STATUS_X
2047,-1163,True,False,False,False,0,1.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17623,-4450,True,False,True,False,0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
9965,-1347,True,False,False,False,0,1.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15313,-1194,True,True,True,False,0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
6073,-889,True,False,False,False,0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [21]:
# Split our preprocessed data into our features and target arrays
y = application_df["TARGET"].values
X = application_df.drop(["TARGET"],1).values
T = application_df.drop(["TARGET"],1)

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [22]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [23]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 50
hidden_nodes_layer2 = 20

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="sigmoid")
)


# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="sigmoid"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 50)                12600     
                                                                 
 dense_1 (Dense)             (None, 20)                1020      
                                                                 
 dense_2 (Dense)             (None, 1)                 21        
                                                                 
Total params: 13,641
Trainable params: 13,641
Non-trainable params: 0
_________________________________________________________________


In [24]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [25]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    period=5)



In [26]:
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=250,callbacks=[cp_callback])

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
 1/30 [>.............................] - ETA: 0s - loss: 0.1224 - accuracy: 1.0000
Epoch 00005: saving model to checkpoints\weights.05.hdf5
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0591 - accuracy: 1.0000
Epoch 00010: saving model to checkpoints\weights.10.hdf5
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0399 - accuracy: 1.0000
Epoch 00015: saving model to checkpoints\weights.15.hdf5
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0307 - accuracy: 1.0000
Epoch 00020: saving model to checkpoints\weights.20.hdf5
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
 1/30 [>.............................] - ETA: 0s - loss: 0.2221 - accuracy: 0.9375
Epoch 00025: saving model to checkpoints\

 1/30 [>.............................] - ETA: 0s - loss: 0.0150 - accuracy: 1.0000
Epoch 00065: saving model to checkpoints\weights.65.hdf5
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0257 - accuracy: 1.0000
Epoch 00070: saving model to checkpoints\weights.70.hdf5
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0700 - accuracy: 0.9688
Epoch 00075: saving model to checkpoints\weights.75.hdf5
Epoch 76/250
Epoch 77/250
Epoch 78/250
Epoch 79/250
Epoch 80/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0270 - accuracy: 1.0000
Epoch 00080: saving model to checkpoints\weights.80.hdf5
Epoch 81/250
Epoch 82/250
Epoch 83/250
Epoch 84/250
Epoch 85/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0113 - accuracy: 1.0000
Epoch 00085: saving model to checkpoints\weights.85.hdf5
Epoch 86/250
Epoch 87/250
Epoch 88/250
E

Epoch 128/250
Epoch 129/250
Epoch 130/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0197 - accuracy: 1.0000
Epoch 00130: saving model to checkpoints\weights.130.hdf5
Epoch 131/250
Epoch 132/250
Epoch 133/250
Epoch 134/250
Epoch 135/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0024 - accuracy: 1.0000
Epoch 00135: saving model to checkpoints\weights.135.hdf5
Epoch 136/250
Epoch 137/250
Epoch 138/250
Epoch 139/250
Epoch 140/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0123 - accuracy: 1.0000
Epoch 00140: saving model to checkpoints\weights.140.hdf5
Epoch 141/250
Epoch 142/250
Epoch 143/250
Epoch 144/250
Epoch 145/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0049 - accuracy: 1.0000
Epoch 00145: saving model to checkpoints\weights.145.hdf5
Epoch 146/250
Epoch 147/250
Epoch 148/250
Epoch 149/250
Epoch 150/250
 1/30 [>.............................] - ETA: 0s - loss: 0.0022 - accuracy: 1.0000
Epoch 00150: saving model to ch

 1/30 [>.............................] - ETA: 0s - loss: 5.2667e-04 - accuracy: 1.0000
Epoch 00190: saving model to checkpoints\weights.190.hdf5
Epoch 191/250
Epoch 192/250
Epoch 193/250
Epoch 194/250
Epoch 195/250
 1/30 [>.............................] - ETA: 0s - loss: 9.7769e-04 - accuracy: 1.0000
Epoch 00195: saving model to checkpoints\weights.195.hdf5
Epoch 196/250
Epoch 197/250
Epoch 198/250
Epoch 199/250
Epoch 200/250
 1/30 [>.............................] - ETA: 0s - loss: 1.7758e-04 - accuracy: 1.0000
Epoch 00200: saving model to checkpoints\weights.200.hdf5
Epoch 201/250
Epoch 202/250
Epoch 203/250
Epoch 204/250
Epoch 205/250
 1/30 [>.............................] - ETA: 0s - loss: 1.6551e-04 - accuracy: 1.0000
Epoch 00205: saving model to checkpoints\weights.205.hdf5
Epoch 206/250
Epoch 207/250
Epoch 208/250
Epoch 209/250
Epoch 210/250
 1/30 [>.............................] - ETA: 0s - loss: 6.3250e-04 - accuracy: 1.0000
Epoch 00210: saving model to checkpoints\weights.210.

 1/30 [>.............................] - ETA: 0s - loss: 1.9973e-04 - accuracy: 1.0000
Epoch 00250: saving model to checkpoints\weights.250.hdf5


In [27]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

10/10 - 0s - loss: 0.2650 - accuracy: 0.9587 - 113ms/epoch - 11ms/step
Loss: 0.26496371626853943, Accuracy: 0.958730161190033


In [28]:
Results = []
for i in nn.predict(X_test_scaled):
    Results.append(round(i[0]))

Results
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, Results)

array([[302,   6],
       [  7,   0]], dtype=int64)

In [29]:
y_pred = pd.DataFrame(Results,columns=['Results'])
y_pred

Unnamed: 0,Results
0,1
1,0
2,0
3,0
4,0
...,...
310,0
311,0
312,0
313,0


In [30]:
y_pred.to_csv('Applications Results.csv',index=False)