In [None]:
# Import our dependencies
import pandas as pd
import matplotlib as plt
from sklearn.datasets import make_blobs
import sklearn as skl
import tensorflow as tf

In [None]:
# Generate dummy dataset
X, y = make_blobs(n_samples=1000, centers=2, n_features=2, random_state=78)

# Creating a DataFrame with the dummy data
df = pd.DataFrame(X, columns=["Feature 1", "Feature 2"])
df["Target"] = y

# Plotting the dummy data
df.plot.scatter(x="Feature 1", y="Feature 2", c="Target", colormap="winter")

In [None]:
# Use sklearn to split dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [None]:
# Create scaler instance
X_scaler = skl.preprocessing.StandardScaler()

# Fit the scaler
X_scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Create the Keras Sequential model
nn_model = tf.keras.models.Sequential()

In [None]:
# Add our first Dense layer, including the input layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="relu", input_dim=2))

In [None]:
# Add the output layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

In [None]:
# Check the structure of the Sequential model
nn_model.summary()

# Note that the number of parameters in each layer does not equal the number of neurons we defined in the notebook

In [None]:
# Now that we have our layers defined, we have to inform the model how it should train using the input data. The process of informing the model how it should learn and train is called compiling the model.
# Neural network using a specific optimization function and loss metric. The optimization function shapes and molds a neural network model while it is being trained to ensure that it performs to the best of its ability. The loss metric is used by machine learning algorithms to score the performance of the model through each iteration and epoch by evaluating the inaccuracy of a single input.

In [None]:
# loss metric Measures how poorly a model characterizes the data after each iteration.	
# evaluation metric Measures the quality of a machine learning model.
# optimization function Improves the performance of a machine learning algorithm.
# activation function Adds an additional step at each layer.

# https://www.tensorflow.org/api_docs/python/tf/keras/optimizers

In [None]:
# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Fit the model to the training data
# # Fit the model to the training data
# Looking at the model training output, we know that the loss metric was 0.07 and the predictive accuracy is 1.0
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=100)

In [None]:
# When training completes, the model object stores the loss and accuracy metrics across all epochs
# Create a DataFrame containing training history
history_df = pd.DataFrame(fit_model.history, index=range(1,len(fit_model.history["loss"])+1))

# Plot the loss
history_df.plot(y="loss")

In [None]:
# Plot the accuracy
history_df.plot(y="accuracy")

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Looking at the performance metrics from the model, the neural network was able to correctly classify each of the points in the test data. In other words, the model was able to correctly classify data it was not trained on 100% of the time. 

# Therefore, it is important to establish model performance thresholds before designing any machine learning model.

# Depending on the type of data and the use case, we may have to recreate and retrain a model using different parameters, different training/test data, or even look to use a different model entirely.

In [None]:
# Now that we have a trained neural network model and we have verified its performance using a test dataset, we can apply this model to novel datasets and predict the classification of a data poin
# Predict the classification of a new set of blob data
new_X, new_Y = make_blobs(n_samples=10, centers=2, n_features=2, random_state=78)
new_X_scaled = X_scaler.transform(new_X)
(nn_model.predict(new_X_scaled) > 0.5).astype("int32")

In [None]:
from sklearn.datasets import make_moons

# Creating dummy nonlinear data
X_moons, y_moons = make_moons(n_samples=1000, noise=0.08, random_state=78)

# Transforming y_moons to a vertical vector
y_moons = y_moons.reshape(-1, 1)

# Creating a DataFrame to plot the nonlinear dummy data
df_moons = pd.DataFrame(X_moons, columns=["Feature 1", "Feature 2"])
df_moons["Target"] = y_moons

# Plot the nonlinear dummy data
df_moons.plot.scatter(x="Feature 1",y="Feature 2", c="Target",colormap="winter")

In [None]:
# Create training and testing sets
X_moon_train, X_moon_test, y_moon_train, y_moon_test = train_test_split(
    X_moons, y_moons, random_state=78
)

# Create the scaler instance
X_moon_scaler = skl.preprocessing.StandardScaler()

# Fit the scaler
X_moon_scaler.fit(X_moon_train)

# Scale the data
X_moon_train_scaled = X_moon_scaler.transform(X_moon_train)
X_moon_test_scaled = X_moon_scaler.transform(X_moon_test)

In [None]:
# Just as we did with the linear data, we'll train our neural network model using the fit method on the nonlinear training data.

# Training the model with the nonlinear data
model_moon = nn_model.fit(X_moon_train_scaled, y_moon_train, epochs=100, shuffle=True)

In [None]:
# Create a DataFrame containing training history
history_df = pd.DataFrame(model_moon.history, index=range(1,len(model_moon.history["loss"])+1))

# Plot the loss
history_df.plot(y="loss")

In [None]:
# Plot the loss
history_df.plot(y="accuracy")

In [None]:
# So, if adding more neurons to our neural network model increases the performance, why wouldn't we always use the maximum number of neurons? There are two main reasons to limit the number of neurons in a neural network model: overfitting and computation resources.

# A good rule of thumb for a basic neural network is to have two to three times the amount of neurons in the hidden layer as the number of inputs.

In [None]:
# Generate our new Sequential model
new_model = tf.keras.models.Sequential()

In [None]:
# Add the input and hidden layer
number_inputs = 2
number_hidden_nodes = 6

new_model.add(tf.keras.layers.Dense(units=number_hidden_nodes, activation="relu", input_dim=number_inputs))

# Add the output layer that uses a probability activation function
new_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

In [None]:
# Compile the Sequential model together and customize metrics
new_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Fit the model to the training data
new_fit_model = new_model.fit(X_moon_train_scaled, y_moon_train, epochs=100, shuffle=True)

In [None]:
#  When a neural network model does not meet performance expectations, it is usually due to one of two causes: inadequate or inappropriate model design for a given dataset, or insufficient or ineffective training data.

In [None]:
# Optimization neural network
# Check out your input dataset.
# Add more neurons to a hidden layer.
# Add additional hidden layers.
# Use a different activation function for the hidden layers.
# Add additional epochs to the training regimen.

In [None]:
# The sigmoid function values are normalized to a probability between 0 and 1, which is ideal for binary classification.
# The tanh function can be used for classification or regression, and it expands the range between -1 and 1.
# The ReLU function is ideal for looking at positive nonlinear input data for classification or regression.
# The Leaky ReLU function is a good alternative for nonlinear input data with many negative inputs.

In [None]:
# Dataset: https://2u-data-curriculum-team.s3.amazonaws.com/dataviz-online/module_19/ramen-ratings.csv
# Dataset: https://2u-data-curriculum-team.s3.amazonaws.com/dataviz-online/module_19/ramen-ratings.csv# 

In [None]:
# Import our dependencies
import pandas as pd
import sklearn as skl

# Read in our ramen data
ramen_df = pd.read_csv("ramen-ratings.csv")

# Print out the Country value counts
country_counts = ramen_df.Country.value_counts()
country_counts

In [None]:
# Visualize the value counts
# The most straightforward method is to use a density plot to identify where the value counts "fall off" and set the threshold within this region. 
country_counts.plot.density()

In [None]:
# Determine which values to replace
replace_countries = list(country_counts[country_counts < 100].index)

# Replace in DataFrame
for country in replace_countries:
    ramen_df.Country = ramen_df.Country.replace(country,"Other")


# Check to make sure binning was successful
ramen_df.Country.value_counts()

In [None]:
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
encode_df = pd.DataFrame(enc.fit_transform(ramen_df.Country.values.reshape(-1,1)))

# Rename encoded columns
encode_df.columns = enc.get_feature_names(['Country'])
encode_df.head()

In [None]:
# Merge the two DataFrames together and drop the Country column
ramen_df.merge(encode_df,left_index=True,right_index=True).drop("Country",1)

In [None]:
# https://2u-data-curriculum-team.s3.amazonaws.com/dataviz-online/module_19/hr_dataset.csv

In [None]:
# Import our dependencies
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Read in our dataset
hr_df = pd.read_csv("hr_dataset.csv")
hr_df.head()

In [None]:
# Create the StandardScaler instance
scaler = StandardScaler()

In [None]:
# Fit the StandardScaler
scaler.fit(hr_df)

# Scale the data
scaled_data = scaler.transform(hr_df)

In [None]:
# Create a DataFrame with the scaled data
transformed_scaled_data = pd.DataFrame(scaled_data, columns=hr_df.columns)
transformed_scaled_data.head()

In [None]:
# To address the limitations of the basic neural network, we can implement a more robust neural network model by adding additional hidden layers. A neural network with more than one hidden layer is known as a deep neural network:

In [None]:
# Looking at the results of our simulated deep learning model, it does not appear that adding more layers increased the overall performance of the model. This is because the additional layer was redundant—the complexity of the dataset was encapsulated within the two hidden layer.
# The only way to determine how "deep" the deep learning model should be is through trial and error. 

In [None]:
# DS: https://www.kaggle.com/pavansubhasht/ibm-hr-analytics-attrition-dataset
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf

# Import our input dataset
attrition_df = pd.read_csv('HR-Employee-Attrition.csv')
attrition_df.head()

In [None]:
# Generate our categorical variable list
attrition_cat = attrition_df.dtypes[attrition_df.dtypes == "object"].index.tolist()

In [None]:
# Check the number of unique values in each column
attrition_df[attrition_cat].nunique()

In [None]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(attrition_df[attrition_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(attrition_cat)
encode_df.head()

In [None]:
# Merge one-hot encoded features and drop the originals
attrition_df = attrition_df.merge(encode_df,left_index=True, right_index=True)
attrition_df = attrition_df.drop(attrition_cat,1)
attrition_df.head()

In [None]:
# We need to split our training and testing data before fitting our StandardScaler instance. This prevents testing data from influencing the standardization function.

In [None]:
# Split our preprocessed data into our features and target arrays
y = attrition_df["Attrition_Yes"].values
X = attrition_df.drop(["Attrition_Yes","Attrition_No"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Now that our data is preprocessed via one-hot encoding and standardization, we should probably perform a gut check to ensure that no data has been lost from our original DataFrame.

In [None]:
# Define the model - deep neural net
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Training and evaluating the deep learning model is no different from a basic neural network.

In [None]:
# Train the model
fit_model = nn.fit(X_train,y_train,epochs=100)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# https://2u-data-curriculum-team.s3.amazonaws.com/dataviz-online/module_19/diabetes.csv

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf

# Import our input dataset
diabetes_df = pd.read_csv('diabetes.csv')
diabetes_df.head()

In [None]:
# Remove diabetes outcome target from features data
y = diabetes_df.Outcome
X = diabetes_df.drop(columns="Outcome")

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [None]:
# Preprocess numerical data for neural network

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Define the logistic regression model
log_classifier = LogisticRegression(solver="lbfgs",max_iter=200)

# Train the model
log_classifier.fit(X_train,y_train)

# Evaluate the model
y_pred = log_classifier.predict(X_test)
print(f" Logistic regression model accuracy: {accuracy_score(y_test,y_pred):.3f}")


In [None]:
# Define the basic neural network model
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu", input_dim=8))
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50)

# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# https://2u-data-curriculum-team.s3.amazonaws.com/dataviz-online/module_19/bank_telemarketing.csv

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import pandas as pd
import tensorflow as tf

# Import our input dataset
tele_df = pd.read_csv('bank_telemarketing.csv')
tele_df.head()

In [None]:
# Generate our categorical variable list
tele_cat = tele_df.dtypes[tele_df.dtypes == "object"].index.tolist()


# Check the number of unique values in each column
tele_df[tele_cat].nunique()

In [None]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(tele_df[tele_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(tele_cat)
encode_df.head()

In [None]:
# Merge one-hot encoded features and drop the originals
tele_df = tele_df.merge(encode_df,left_index=True, right_index=True)
tele_df = tele_df.drop(tele_cat,1)
tele_df.head()

In [None]:
# Remove loan status target from features data
y = tele_df.Subscribed_yes.values
X = tele_df.drop(columns=["Subscribed_no","Subscribed_yes"]).values

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Create the SVM model
svm = SVC(kernel='linear')

In [None]:
# Train the model
svm.fit(X_train, y_train)

In [None]:
# Evaluate the model
y_pred = svm.predict(X_test_scaled)
print(f" SVM model accuracy: {accuracy_score(y_test,y_pred):.3f}")

In [None]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  10
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model 
fit_model = nn.fit(X_train_scaled, y_train, epochs=50) 
# Evaluate the model using the test data 
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Random forest classifiers are one of Bek's favorites. (Not only is it a powerful model type, she can't help thinking about actual forests when she sees the name!) She's curious how it stacks up against a deep learning model.
# Random forest classifiers are a type of ensemble learning model that combines multiple smaller models into a more robust and accurate model.
# Random forest models use a number of weak learner algorithms (decision trees) and combine their output to make a final classification (or regression) decision

In [None]:
# If random forest models are fairly robust and clear, why would you want to replace them with a neural network?

# The answer depends on the type and complexity of the entire dataset. First and foremost, random forest models will only handle tabular data, so data such as images or natural language data cannot be used in a random forest without heavy modifications to the data. Neural networks can handle all sorts of data types and structures in raw format or with general transformations (such as converting categorical data).

# In addition, each model handles input data differently. Random forest models are dependent on each weak learner being trained on a subset of the input data. Once each weak learner is trained, the random forest model predicts the classification based on a consensus of the weak learners. In contrast, deep learning models evaluate input data within a single neuron, as well as across multiple neurons and layers.

# As a result, the deep learning model might be able to identify variability in a dataset that a random forest model could miss. However, a random forest model with a sufficient number of estimators and tree depth should be able to perform at a similar capacity to most deep learning models.

In [None]:
# https://2u-data-curriculum-team.s3.amazonaws.com/dataviz-online/module_19/loan_status.csv

In [5]:
# Import our dependencies
### RESTART KERNEL ###
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import tensorflow as tf

# Import our input dataset
loans_df = pd.read_csv('loan_status.csv')
loans_df.head()

Unnamed: 0,Loan_Status,Current_Loan_Amount,Term,Credit_Score,Annual_Income,Years_in_current_job,Home_Ownership,Purpose,Monthly_Debt,Years_of_Credit_History,Months_since_last_delinquent,Number_of_Open_Accounts,Number_of_Credit_Problems,Current_Credit_Balance,Maximum_Open_Credit,Bankruptcies,Tax_Liens
0,Fully_Paid,99999999,Short_Term,741.0,2231892.0,8_years,Own_Home,Debt_Consolidation,29200.53,14.9,29.0,18,1,297996,750090.0,0.0,0.0
1,Fully_Paid,217646,Short_Term,730.0,1184194.0,<_1_year,Home_Mortgage,Debt_Consolidation,10855.08,19.6,10.0,13,1,122170,272052.0,1.0,0.0
2,Fully_Paid,548746,Short_Term,678.0,2559110.0,2_years,Rent,Debt_Consolidation,18660.28,22.6,33.0,4,0,437171,555038.0,0.0,0.0
3,Fully_Paid,99999999,Short_Term,728.0,714628.0,3_years,Rent,Debt_Consolidation,11851.06,16.0,76.0,16,0,203965,289784.0,0.0,0.0
4,Fully_Paid,99999999,Short_Term,740.0,776188.0,<_1_year,Own_Home,Debt_Consolidation,11578.22,8.5,25.0,6,0,134083,220220.0,0.0,0.0


In [6]:
# Generate our categorical variable list
loans_cat = loans_df.dtypes[loans_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
loans_df[loans_cat].nunique()

Loan_Status              2
Term                     2
Years_in_current_job    11
Home_Ownership           4
Purpose                  7
dtype: int64

In [7]:
# Check the unique value counts to see if binning is required
loans_df.Years_in_current_job.value_counts()

10+_years    13149
2_years       3225
3_years       2997
<_1_year      2699
5_years       2487
4_years       2286
1_year        2247
6_years       2109
7_years       2082
8_years       1675
9_years       1467
Name: Years_in_current_job, dtype: int64

In [8]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(loans_df[loans_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(loans_cat)
encode_df.head()

Unnamed: 0,Loan_Status_Fully_Paid,Loan_Status_Not_Paid,Term_Long_Term,Term_Short_Term,Years_in_current_job_10+_years,Years_in_current_job_1_year,Years_in_current_job_2_years,Years_in_current_job_3_years,Years_in_current_job_4_years,Years_in_current_job_5_years,...,Home_Ownership_Home_Mortgage,Home_Ownership_Own_Home,Home_Ownership_Rent,Purpose_Business_Loan,Purpose_Buy_House,Purpose_Buy_a_Car,Purpose_Debt_Consolidation,Purpose_Home_Improvements,Purpose_Medical_Bills,Purpose_Other
0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [9]:
# Merge one-hot encoded features and drop the originals
loans_df = loans_df.merge(encode_df,left_index=True, right_index=True)
loans_df = loans_df.drop(loans_cat,1)
loans_df.head()

Unnamed: 0,Current_Loan_Amount,Credit_Score,Annual_Income,Monthly_Debt,Years_of_Credit_History,Months_since_last_delinquent,Number_of_Open_Accounts,Number_of_Credit_Problems,Current_Credit_Balance,Maximum_Open_Credit,...,Home_Ownership_Home_Mortgage,Home_Ownership_Own_Home,Home_Ownership_Rent,Purpose_Business_Loan,Purpose_Buy_House,Purpose_Buy_a_Car,Purpose_Debt_Consolidation,Purpose_Home_Improvements,Purpose_Medical_Bills,Purpose_Other
0,99999999,741.0,2231892.0,29200.53,14.9,29.0,18,1,297996,750090.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,217646,730.0,1184194.0,10855.08,19.6,10.0,13,1,122170,272052.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,548746,678.0,2559110.0,18660.28,22.6,33.0,4,0,437171,555038.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,99999999,728.0,714628.0,11851.06,16.0,76.0,16,0,203965,289784.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,99999999,740.0,776188.0,11578.22,8.5,25.0,6,0,134083,220220.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [10]:
# Remove loan status target from features data
y = loans_df.Loan_Status_Fully_Paid
X = loans_df.drop(columns=["Loan_Status_Fully_Paid","Loan_Status_Not_Paid"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  10
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [12]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.849


In [13]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.849


In [14]:
# 19.6.1

In [15]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [16]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Create a callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq=100)

# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100,callbacks=[cp_callback])

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
 75/854 [=>............................] - ETA: 3s - loss: 0.6578 - accuracy: 0.6183
Epoch 00001: saving model to checkpoints/weights.01.hdf5
182/854 [=====>........................] - ETA: 2s - loss: 0.6184 - accuracy: 0.6877 ETA: 2s - loss: 0.6316 - accuracy: 0.
Epoch 00001: saving model to checkpoints/weights.01.hdf5
Epoch 00001: saving model to checkpoints/weights.01.hdf5
Epoch 00001: saving model to checkpoints/weights.01.hdf5
Epoch 00001: saving model to checkpoints/weights.01.hdf5
Epoch 00001: saving model to checkpoints/weights.01.hdf5
Epoch 00001: saving model to checkpoints/weights.01.hdf5
Epoch 00001: saving model to checkpoints/weights.01.hdf5
Epoch 2/100
 29/854 [>.............................] - ETA: 2s - loss: 0.3533 - accuracy: 0.87 - ETA: 1s - loss: 0.4114 - accuracy: 0.8293
Epoch 00002: saving model to checkpoints/weights.02.hdf5
124/854 [===>..........................] - ETA: 1s - loss: 0.4050 - accuracy: 0.8375
Epoch 00002: saving model to checkpoints/we

Epoch 00007: saving model to checkpoints/weights.07.hdf5
Epoch 00007: saving model to checkpoints/weights.07.hdf5
Epoch 00007: saving model to checkpoints/weights.07.hdf5
Epoch 00007: saving model to checkpoints/weights.07.hdf5
Epoch 00007: saving model to checkpoints/weights.07.hdf5
Epoch 00007: saving model to checkpoints/weights.07.hdf5
Epoch 8/100
  1/854 [..............................] - ETA: 3s - loss: 0.3852 - accuracy: 0.8750
Epoch 00008: saving model to checkpoints/weights.08.hdf5
107/854 [==>...........................] - ETA: 1s - loss: 0.3872 - accuracy: 0.8430
Epoch 00008: saving model to checkpoints/weights.08.hdf5
Epoch 00008: saving model to checkpoints/weights.08.hdf5
Epoch 00008: saving model to checkpoints/weights.08.hdf5
Epoch 00008: saving model to checkpoints/weights.08.hdf5
Epoch 00008: saving model to checkpoints/weights.08.hdf5
Epoch 00008: saving model to checkpoints/weights.08.hdf5
Epoch 00008: saving model to checkpoints/weights.08.hdf5
Epoch 00008: saving 

Epoch 00013: saving model to checkpoints/weights.13.hdf5
Epoch 00013: saving model to checkpoints/weights.13.hdf5
Epoch 00013: saving model to checkpoints/weights.13.hdf5
Epoch 00013: saving model to checkpoints/weights.13.hdf5
Epoch 00013: saving model to checkpoints/weights.13.hdf5
Epoch 14/100
 80/854 [=>............................] - ETA: 2s - loss: 0.3843 - accuracy: 0.8389
Epoch 00014: saving model to checkpoints/weights.14.hdf5
168/854 [====>.........................] - ETA: 1s - loss: 0.3776 - accuracy: 0.8446
Epoch 00014: saving model to checkpoints/weights.14.hdf5
Epoch 00014: saving model to checkpoints/weights.14.hdf5
Epoch 00014: saving model to checkpoints/weights.14.hdf5
Epoch 00014: saving model to checkpoints/weights.14.hdf5
Epoch 00014: saving model to checkpoints/weights.14.hdf5
Epoch 00014: saving model to checkpoints/weights.14.hdf5
Epoch 00014: saving model to checkpoints/weights.14.hdf5
Epoch 15/100
 44/854 [>.............................] - ETA: 1s - loss: 0.38

Epoch 00019: saving model to checkpoints/weights.19.hdf5
Epoch 00019: saving model to checkpoints/weights.19.hdf5
Epoch 00019: saving model to checkpoints/weights.19.hdf5
Epoch 20/100
 72/854 [=>............................] - ETA: 1s - loss: 0.3799 - accuracy: 0.8436
Epoch 00020: saving model to checkpoints/weights.20.hdf5
168/854 [====>.........................] - ETA: 1s - loss: 0.3819 - accuracy: 0.8423
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 00020: saving model to checkpoints/weights.20.hdf5
Epoch 21/100
 16/854 [..............................] - ETA: 2s - loss: 0.3967 - accuracy: 0.8352
Epoch 00021: saving model to checkpoints/weights.21.hdf5
112/854 [==>.......................

Epoch 00025: saving model to checkpoints/weights.25.hdf5
Epoch 26/100
 39/854 [>.............................] - ETA: 2s - loss: 0.3829 - accuracy: 0.8429
Epoch 00026: saving model to checkpoints/weights.26.hdf5
133/854 [===>..........................] - ETA: 1s - loss: 0.3824 - accuracy: 0.8448
Epoch 00026: saving model to checkpoints/weights.26.hdf5
Epoch 00026: saving model to checkpoints/weights.26.hdf5
Epoch 00026: saving model to checkpoints/weights.26.hdf5
Epoch 00026: saving model to checkpoints/weights.26.hdf5
Epoch 00026: saving model to checkpoints/weights.26.hdf5
Epoch 00026: saving model to checkpoints/weights.26.hdf5
Epoch 00026: saving model to checkpoints/weights.26.hdf5
Epoch 00026: saving model to checkpoints/weights.26.hdf5
Epoch 27/100
 79/854 [=>............................] - ETA: 2s - loss: 0.3692 - accuracy: 0.8516
Epoch 00027: saving model to checkpoints/weights.27.hdf5
186/854 [=====>........................] - ETA: 1s - loss: 0.3717 - accuracy: 0.8506
Epoch 0

Epoch 32/100
 21/854 [..............................] - ETA: 2s - loss: 0.3325 - accuracy: 0.8768
Epoch 00032: saving model to checkpoints/weights.32.hdf5
119/854 [===>..........................] - ETA: 1s - loss: 0.3787 - accuracy: 0.8467
Epoch 00032: saving model to checkpoints/weights.32.hdf5
Epoch 00032: saving model to checkpoints/weights.32.hdf5
Epoch 00032: saving model to checkpoints/weights.32.hdf5
Epoch 00032: saving model to checkpoints/weights.32.hdf5
Epoch 00032: saving model to checkpoints/weights.32.hdf5
Epoch 00032: saving model to checkpoints/weights.32.hdf5
Epoch 00032: saving model to checkpoints/weights.32.hdf5
Epoch 00032: saving model to checkpoints/weights.32.hdf5
Epoch 33/100
 59/854 [=>............................] - ETA: 2s - loss: 0.3891 - accuracy: 0.8408
Epoch 00033: saving model to checkpoints/weights.33.hdf5
151/854 [====>.........................] - ETA: 1s - loss: 0.3753 - accuracy: 0.8498
Epoch 00033: saving model to checkpoints/weights.33.hdf5
Epoch 0

175/854 [=====>........................] - ETA: 1s - loss: 0.3735 - accuracy: 0.8489
Epoch 00038: saving model to checkpoints/weights.38.hdf5
Epoch 00038: saving model to checkpoints/weights.38.hdf5
Epoch 00038: saving model to checkpoints/weights.38.hdf5
Epoch 00038: saving model to checkpoints/weights.38.hdf5
Epoch 00038: saving model to checkpoints/weights.38.hdf5
Epoch 00038: saving model to checkpoints/weights.38.hdf5
Epoch 00038: saving model to checkpoints/weights.38.hdf5
Epoch 39/100
 21/854 [..............................] - ETA: 2s - loss: 0.3536 - accuracy: 0.8677
Epoch 00039: saving model to checkpoints/weights.39.hdf5
148/854 [====>.........................] - ETA: 1s - loss: 0.3621 - accuracy: 0.8588
Epoch 00039: saving model to checkpoints/weights.39.hdf5
Epoch 00039: saving model to checkpoints/weights.39.hdf5
Epoch 00039: saving model to checkpoints/weights.39.hdf5
Epoch 00039: saving model to checkpoints/weights.39.hdf5
Epoch 00039: saving model to checkpoints/weights

Epoch 00044: saving model to checkpoints/weights.44.hdf5
Epoch 00044: saving model to checkpoints/weights.44.hdf5
Epoch 00044: saving model to checkpoints/weights.44.hdf5
Epoch 00044: saving model to checkpoints/weights.44.hdf5
Epoch 00044: saving model to checkpoints/weights.44.hdf5
Epoch 45/100
 23/854 [..............................] - ETA: 1s - loss: 0.3605 - accuracy: 0.8536
Epoch 00045: saving model to checkpoints/weights.45.hdf5
 99/854 [==>...........................] - ETA: 1s - loss: 0.3684 - accuracy: 0.8507
Epoch 00045: saving model to checkpoints/weights.45.hdf5
Epoch 00045: saving model to checkpoints/weights.45.hdf5
Epoch 00045: saving model to checkpoints/weights.45.hdf5
Epoch 00045: saving model to checkpoints/weights.45.hdf5
Epoch 00045: saving model to checkpoints/weights.45.hdf5
Epoch 00045: saving model to checkpoints/weights.45.hdf5
Epoch 00045: saving model to checkpoints/weights.45.hdf5
Epoch 00045: saving model to checkpoints/weights.45.hdf5
Epoch 46/100
 50/85

Epoch 00050: saving model to checkpoints/weights.50.hdf5
Epoch 00050: saving model to checkpoints/weights.50.hdf5
Epoch 00050: saving model to checkpoints/weights.50.hdf5
Epoch 00050: saving model to checkpoints/weights.50.hdf5
Epoch 51/100
100/854 [==>...........................] - ETA: 1s - loss: 0.3292 - accuracy: 0.8718
Epoch 00051: saving model to checkpoints/weights.51.hdf5
176/854 [=====>........................] - ETA: 2s - loss: 0.3419 - accuracy: 0.8650
Epoch 00051: saving model to checkpoints/weights.51.hdf5
Epoch 00051: saving model to checkpoints/weights.51.hdf5
Epoch 00051: saving model to checkpoints/weights.51.hdf5
Epoch 00051: saving model to checkpoints/weights.51.hdf5
Epoch 00051: saving model to checkpoints/weights.51.hdf5
Epoch 00051: saving model to checkpoints/weights.51.hdf5
Epoch 00051: saving model to checkpoints/weights.51.hdf5
Epoch 52/100
 27/854 [..............................] - ETA: 1s - loss: 0.3540 - accuracy: 0.8636
Epoch 00052: saving model to checkp

Epoch 00056: saving model to checkpoints/weights.56.hdf5
Epoch 00056: saving model to checkpoints/weights.56.hdf5
Epoch 57/100
 69/854 [=>............................] - ETA: 1s - loss: 0.3546 - accuracy: 0.8597
Epoch 00057: saving model to checkpoints/weights.57.hdf5
144/854 [====>.........................] - ETA: 1s - loss: 0.3617 - accuracy: 0.8558
Epoch 00057: saving model to checkpoints/weights.57.hdf5
Epoch 00057: saving model to checkpoints/weights.57.hdf5
Epoch 00057: saving model to checkpoints/weights.57.hdf5
Epoch 00057: saving model to checkpoints/weights.57.hdf5
Epoch 00057: saving model to checkpoints/weights.57.hdf5
Epoch 00057: saving model to checkpoints/weights.57.hdf5
Epoch 00057: saving model to checkpoints/weights.57.hdf5
Epoch 58/100
 15/854 [..............................] - ETA: 3s - loss: 0.3465 - accuracy: 0.8662
Epoch 00058: saving model to checkpoints/weights.58.hdf5
 99/854 [==>...........................] - ETA: 2s - loss: 0.3640 - accuracy: 0.8583
Epoch 0

Epoch 63/100
 44/854 [>.............................] - ETA: 1s - loss: 0.3540 - accuracy: 0.8646
Epoch 00063: saving model to checkpoints/weights.63.hdf5
151/854 [====>.........................] - ETA: 1s - loss: 0.3599 - accuracy: 0.8592
Epoch 00063: saving model to checkpoints/weights.63.hdf5
Epoch 00063: saving model to checkpoints/weights.63.hdf5
Epoch 00063: saving model to checkpoints/weights.63.hdf5
Epoch 00063: saving model to checkpoints/weights.63.hdf5
Epoch 00063: saving model to checkpoints/weights.63.hdf5
Epoch 00063: saving model to checkpoints/weights.63.hdf5
Epoch 00063: saving model to checkpoints/weights.63.hdf5
Epoch 00063: saving model to checkpoints/weights.63.hdf5
Epoch 64/100
 80/854 [=>............................] - ETA: 2s - loss: 0.3604 - accuracy: 0.8568
Epoch 00064: saving model to checkpoints/weights.64.hdf5
188/854 [=====>........................] - ETA: 1s - loss: 0.3621 - accuracy: 0.8566
Epoch 00064: saving model to checkpoints/weights.64.hdf5
Epoch 0

122/854 [===>..........................] - ETA: 3s - loss: 0.3855 - accuracy: 0.8397
Epoch 00069: saving model to checkpoints/weights.69.hdf5
Epoch 00069: saving model to checkpoints/weights.69.hdf5
Epoch 00069: saving model to checkpoints/weights.69.hdf5
Epoch 00069: saving model to checkpoints/weights.69.hdf5
Epoch 00069: saving model to checkpoints/weights.69.hdf5
Epoch 00069: saving model to checkpoints/weights.69.hdf5
Epoch 00069: saving model to checkpoints/weights.69.hdf5
Epoch 00069: saving model to checkpoints/weights.69.hdf5
Epoch 70/100
 72/854 [=>............................] - ETA: 2s - loss: 0.3941 - accuracy: 0.8375
Epoch 00070: saving model to checkpoints/weights.70.hdf5
160/854 [====>.........................] - ETA: 2s - loss: 0.3829 - accuracy: 0.8440
Epoch 00070: saving model to checkpoints/weights.70.hdf5
Epoch 00070: saving model to checkpoints/weights.70.hdf5
Epoch 00070: saving model to checkpoints/weights.70.hdf5
Epoch 00070: saving model to checkpoints/weights

Epoch 00075: saving model to checkpoints/weights.75.hdf5
Epoch 00075: saving model to checkpoints/weights.75.hdf5
Epoch 00075: saving model to checkpoints/weights.75.hdf5
Epoch 00075: saving model to checkpoints/weights.75.hdf5
Epoch 00075: saving model to checkpoints/weights.75.hdf5
Epoch 00075: saving model to checkpoints/weights.75.hdf5
Epoch 76/100
 26/854 [..............................] - ETA: 1s - loss: 0.3986 - accuracy: 0.8138
Epoch 00076: saving model to checkpoints/weights.76.hdf5
136/854 [===>..........................] - ETA: 1s - loss: 0.3870 - accuracy: 0.8362
Epoch 00076: saving model to checkpoints/weights.76.hdf5
Epoch 00076: saving model to checkpoints/weights.76.hdf5
Epoch 00076: saving model to checkpoints/weights.76.hdf5
Epoch 00076: saving model to checkpoints/weights.76.hdf5
Epoch 00076: saving model to checkpoints/weights.76.hdf5
Epoch 00076: saving model to checkpoints/weights.76.hdf5
Epoch 00076: saving model to checkpoints/weights.76.hdf5
Epoch 00076: saving

Epoch 00081: saving model to checkpoints/weights.81.hdf5
Epoch 00081: saving model to checkpoints/weights.81.hdf5
Epoch 00081: saving model to checkpoints/weights.81.hdf5
Epoch 00081: saving model to checkpoints/weights.81.hdf5
Epoch 82/100
  1/854 [..............................] - ETA: 3s - loss: 0.2876 - accuracy: 0.9062
Epoch 00082: saving model to checkpoints/weights.82.hdf5
106/854 [==>...........................] - ETA: 1s - loss: 0.3657 - accuracy: 0.8564
Epoch 00082: saving model to checkpoints/weights.82.hdf5
Epoch 00082: saving model to checkpoints/weights.82.hdf5
Epoch 00082: saving model to checkpoints/weights.82.hdf5
Epoch 00082: saving model to checkpoints/weights.82.hdf5
Epoch 00082: saving model to checkpoints/weights.82.hdf5
Epoch 00082: saving model to checkpoints/weights.82.hdf5
Epoch 00082: saving model to checkpoints/weights.82.hdf5
Epoch 00082: saving model to checkpoints/weights.82.hdf5
Epoch 83/100
 67/854 [=>............................] - ETA: 1s - loss: 0.35

Epoch 00087: saving model to checkpoints/weights.87.hdf5
Epoch 00087: saving model to checkpoints/weights.87.hdf5
Epoch 88/100
  1/854 [..............................] - ETA: 2s - loss: 0.4542 - accuracy: 0.8438
Epoch 00088: saving model to checkpoints/weights.88.hdf5
 99/854 [==>...........................] - ETA: 1s - loss: 0.3867 - accuracy: 0.8417
Epoch 00088: saving model to checkpoints/weights.88.hdf5
192/854 [=====>........................] - ETA: 1s - loss: 0.3809 - accuracy: 0.8447
Epoch 00088: saving model to checkpoints/weights.88.hdf5
Epoch 00088: saving model to checkpoints/weights.88.hdf5
Epoch 00088: saving model to checkpoints/weights.88.hdf5
Epoch 00088: saving model to checkpoints/weights.88.hdf5
Epoch 00088: saving model to checkpoints/weights.88.hdf5
Epoch 00088: saving model to checkpoints/weights.88.hdf5
Epoch 00088: saving model to checkpoints/weights.88.hdf5
Epoch 89/100
 25/854 [..............................] - ETA: 1s - loss: 0.3424 - accuracy: 0.8679
Epoch 0

Epoch 00093: saving model to checkpoints/weights.93.hdf5
Epoch 94/100
 56/854 [>.............................] - ETA: 2s - loss: 0.3674 - accuracy: 0.8546
Epoch 00094: saving model to checkpoints/weights.94.hdf5
160/854 [====>.........................] - ETA: 1s - loss: 0.3692 - accuracy: 0.8528
Epoch 00094: saving model to checkpoints/weights.94.hdf5
Epoch 00094: saving model to checkpoints/weights.94.hdf5
Epoch 00094: saving model to checkpoints/weights.94.hdf5
Epoch 00094: saving model to checkpoints/weights.94.hdf5
Epoch 00094: saving model to checkpoints/weights.94.hdf5
Epoch 00094: saving model to checkpoints/weights.94.hdf5
Epoch 00094: saving model to checkpoints/weights.94.hdf5
Epoch 95/100
  1/854 [..............................] - ETA: 1s - loss: 0.4293 - accuracy: 0.8438
Epoch 00095: saving model to checkpoints/weights.95.hdf5
122/854 [===>..........................] - ETA: 1s - loss: 0.3793 - accuracy: 0.8422
Epoch 00095: saving model to checkpoints/weights.95.hdf5
Epoch 0

 35/854 [>.............................] - ETA: 2s - loss: 0.4079 - accuracy: 0.8274
Epoch 00100: saving model to checkpoints/weights.100.hdf5
142/854 [===>..........................] - ETA: 1s - loss: 0.3914 - accuracy: 0.8387
Epoch 00100: saving model to checkpoints/weights.100.hdf5
Epoch 00100: saving model to checkpoints/weights.100.hdf5
Epoch 00100: saving model to checkpoints/weights.100.hdf5
Epoch 00100: saving model to checkpoints/weights.100.hdf5
Epoch 00100: saving model to checkpoints/weights.100.hdf5
Epoch 00100: saving model to checkpoints/weights.100.hdf5
Epoch 00100: saving model to checkpoints/weights.100.hdf5
Epoch 00100: saving model to checkpoints/weights.100.hdf5
285/285 - 0s - loss: 0.3869 - accuracy: 0.8460
Loss: 0.386850506067276, Accuracy: 0.8460355997085571


In [18]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  10
hidden_nodes_layer2 = 5

nn_new = tf.keras.models.Sequential()

# First hidden layer
nn_new.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn_new.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn_new.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the model
nn_new.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Restore the model weights
nn_new.load_weights("checkpoints/weights.100.hdf5")

# Evaluate the model using the test data
model_loss, model_accuracy = nn_new.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

285/285 - 0s - loss: 0.3869 - accuracy: 0.8460
Loss: 0.386850506067276, Accuracy: 0.8460355997085571


In [19]:
# Export our model to HDF5 file
nn_new.save("trained_attrition.h5")

In [20]:
# Import the model to a new object
nn_imported = tf.keras.models.load_model('trained_attrition.h5')

In [21]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_new.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

285/285 - 0s - loss: 0.3869 - accuracy: 0.8460
Loss: 0.386850506067276, Accuracy: 0.8460355997085571
