# Test how to make predictions using saved tfmodel


In [1]:
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import joblib
from joblib import dump, load

In [2]:
encoder = joblib.load('../Resources/encoder.joblib')
scaler = joblib.load('../Resources/scaler.joblib')

In [3]:
file = ("../Resources/tensorflowmodel.keras")
model = tf.keras.models.load_model(file)

In [4]:
import pandas as pd

In [5]:
# Define the new data
new_data = {'person_age': 21, 'person_income': 9600, 'person_home_ownership': 'MORTGAGE', 'person_emp_length': 1, 'loan_intent': 'MEDICAL', 'loan_grade': 'C', 'loan_amnt': 9600, 'loan_int_rate': 12.87, 'loan_percent_income': 1, 'cb_person_default_on_file': 'N', 'cb_person_cred_hist_length': 1}

# Convert to DataFrame
new_df = pd.DataFrame([new_data])

# Convert categorical variables to one-hot encoding
categorical_columns = ['person_home_ownership', 'loan_intent', 'loan_grade', 'cb_person_default_on_file']
new_df = pd.get_dummies(new_df, columns=categorical_columns)

# Ensure all columns present in the training data are in the new data
training_categorical_columns = ['person_home_ownership_MORTGAGE', 'person_home_ownership_OTHER', 
                                 'person_home_ownership_OWN', 'person_home_ownership_RENT', 
                                 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 
                                 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 
                                 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 
                                 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 
                                 'loan_grade_F', 'loan_grade_G', 'cb_person_default_on_file_N', 
                                 'cb_person_default_on_file_Y']

missing_cols = set(training_categorical_columns).difference(new_df.columns)
for col in missing_cols:
    new_df[col] = 0

# Reorder the columns to match the order in the training data
new_df = new_df[['person_age', 'person_income', 'person_emp_length', 
                                                'loan_amnt', 'loan_int_rate','loan_percent_income', 'cb_person_cred_hist_length'] + training_categorical_columns  ]

pd.set_option('display.max_columns', None)

# Now new_df contains the preprocessed data ready for prediction
new_df

Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,person_home_ownership_MORTGAGE,person_home_ownership_OTHER,person_home_ownership_OWN,person_home_ownership_RENT,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE,loan_grade_A,loan_grade_B,loan_grade_C,loan_grade_D,loan_grade_E,loan_grade_F,loan_grade_G,cb_person_default_on_file_N,cb_person_default_on_file_Y
0,21,9600,1,9600,12.87,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0


In [6]:
scaled_data = scaler.fit_transform(new_df)

In [7]:
predictions = model.predict(scaled_data)



2024-03-21 16:15:33.559651: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [8]:
# Assuming it's a binary classification, you might want to threshold probabilities to get binary predictions
binary_predictions = (predictions > 0.5).astype(int)

# Print the predictions
print("Predictions:", binary_predictions[0][0])

Predictions: 0


## The Above Method Only predicts 0

# Try New Method from Flask app below

In [9]:
import numpy as np

In [10]:
def preprocess(data):
    # Updated lists without 'loan_status'
    numerical_feature_names = ['person_age', 'person_income', 'person_emp_length', 'loan_amnt', 'loan_int_rate', 
                               'loan_percent_income', 'cb_person_cred_hist_length']
    categorical_feature_names = ['person_home_ownership_MORTGAGE', 'person_home_ownership_OTHER', 
                                 'person_home_ownership_OWN', 'person_home_ownership_RENT', 
                                 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 
                                 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 
                                 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 
                                 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 
                                 'loan_grade_F', 'loan_grade_G', 'cb_person_default_on_file_N', 
                                 'cb_person_default_on_file_Y']

    df = pd.DataFrame([data])
    categorical_columns = ['person_home_ownership', 'loan_intent', 'loan_grade', 'cb_person_default_on_file']
    
    # Handling missing features by setting a default value or skipping
    numerical_features = np.array([data.get(feature, 0.0) for feature in numerical_feature_names]).reshape(1, -1)
    # Ensure categorical features are correctly extracted
    categorical_features = np.array([data.get(feature, 0) for feature in categorical_columns]).reshape(1, -1)
    
    # Apply one-hot encoding to categorical features
    encoded_features = encoder.transform(categorical_features).toarray()
    
    # Scale numerical features
    scaled_numerical_features = scaler.transform(numerical_features)
    
    # Concatenate scaled numerical features and encoded categorical features
    processed_data = np.concatenate([scaled_numerical_features, encoded_features], axis=1)
    
    return processed_data

In [11]:
#preprocess(new_data)

## preprocess function returns error

In [12]:
def preprocess_(data):
    # Updated lists without 'loan_status'
    numerical_feature_names = ['person_age', 'person_income', 'person_emp_length', 'loan_amnt', 'loan_int_rate', 
                               'loan_percent_income', 'cb_person_cred_hist_length']
    categorical_feature_names = ['person_home_ownership_MORTGAGE', 'person_home_ownership_OTHER', 
                                 'person_home_ownership_OWN', 'person_home_ownership_RENT', 
                                 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 
                                 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 
                                 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 
                                 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 
                                 'loan_grade_F', 'loan_grade_G', 'cb_person_default_on_file_N', 
                                 'cb_person_default_on_file_Y']

    df = pd.DataFrame([data])
    categorical_columns = ['person_home_ownership', 'loan_intent', 'loan_grade', 'cb_person_default_on_file']
    
    # Handling missing features by setting a default value or skipping
    numerical_features = np.array([data.get(feature, 0.0) for feature in numerical_feature_names]).reshape(1, -1)
    # Ensure categorical features are correctly extracted
    categorical_features = np.array([data.get(feature, 0) for feature in categorical_columns]).reshape(1, -1)
    
    # Apply one-hot encoding to categorical features
    encoded_features = encoder.transform(categorical_features).toarray()
    
    # Scale numerical features
    scaled_numerical_features = scaler.transform(numerical_features)
    
    # Concatenate scaled numerical features and encoded categorical features
    processed_data = np.concatenate([scaled_numerical_features, encoded_features], axis=1)
    
    return processed_data

# Testing 3rd method - manual


In [13]:
new_data = {'person_age': 21, 'person_income': 9600, 'person_home_ownership': 'OWN', 'person_emp_length': 5, 'loan_intent': 'EDUCATION', 'loan_grade': 'B', 'loan_amnt': 1000, 'loan_int_rate': 11.14, 'loan_percent_income': 0.1, 'cb_person_default_on_file': 'N', 'cb_person_cred_hist_length': 2}

In [14]:
test_df = pd.DataFrame([new_data])

test_df

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,21,9600,OWN,5,EDUCATION,B,1000,11.14,0.1,N,2


In [15]:
# Generate our categorical variable lists
df_cat = test_df.dtypes[test_df.dtypes == "object"].index.tolist()
df_cat

['person_home_ownership',
 'loan_intent',
 'loan_grade',
 'cb_person_default_on_file']

In [16]:
# Create a OneHotEncoder instance
encoder = OneHotEncoder()

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(encoder.fit_transform(test_df[df_cat]).toarray())

# Add the encoded variable names to the dataframe
encode_df.columns = encoder.get_feature_names_out(df_cat)
encode_df.head()

Unnamed: 0,person_home_ownership_OWN,loan_intent_EDUCATION,loan_grade_B,cb_person_default_on_file_N
0,1.0,1.0,1.0,1.0


In [17]:
test_df = test_df.merge(encode_df, left_index=True, right_index=True)
test_df = test_df.drop(df_cat, 1)
test_df.head()

  test_df = test_df.drop(df_cat, 1)


Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,person_home_ownership_OWN,loan_intent_EDUCATION,loan_grade_B,cb_person_default_on_file_N
0,21,9600,5,1000,11.14,0.1,2,1.0,1.0,1.0,1.0


In [18]:
len(test_df.columns)

11

In [19]:
# Ensure all columns present in the training data are in the new data
training_categorical_columns = ['person_home_ownership_MORTGAGE', 'person_home_ownership_OTHER', 
                                 'person_home_ownership_OWN', 'person_home_ownership_RENT', 
                                 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 
                                 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 
                                 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 
                                 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 
                                 'loan_grade_F', 'loan_grade_G', 'cb_person_default_on_file_N', 
                                 'cb_person_default_on_file_Y']

missing_cols = set(training_categorical_columns).difference(test_df.columns)
for col in missing_cols:
    test_df[col] = 0.0

# Reorder the columns to match the order in the training data
new_df = test_df[['person_age', 'person_income', 'person_emp_length', 
                                                'loan_amnt', 'loan_int_rate','loan_percent_income', 'cb_person_cred_hist_length'] + training_categorical_columns  ]

pd.set_option('display.max_columns', None)

# Now new_df contains the preprocessed data ready for prediction
new_df


Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,person_home_ownership_MORTGAGE,person_home_ownership_OTHER,person_home_ownership_OWN,person_home_ownership_RENT,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE,loan_grade_A,loan_grade_B,loan_grade_C,loan_grade_D,loan_grade_E,loan_grade_F,loan_grade_G,cb_person_default_on_file_N,cb_person_default_on_file_Y
0,21,9600,5,1000,11.14,0.1,2,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [20]:
X = new_df.values

In [21]:
X_scaler = scaler.fit_transform(X)

In [22]:
model.predict(X_scaler)[0][0]



0.14121939

In [23]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 8)                 216       
                                                                 
 dense_1 (Dense)             (None, 5)                 45        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 267
Trainable params: 267
Non-trainable params: 0
_________________________________________________________________


In [24]:
# Model still predicts same value for different data

## Test 4


In [73]:
test_data = {'person_age': 20, 'person_income': 500, 'person_home_ownership': 'RENT', 'person_emp_length': 2, 'loan_intent': 'DEBTCONSOLIDATION', 'loan_grade': 'F', 'loan_amnt': 290, 'loan_int_rate': 5, 'loan_percent_income': 0.58, 'cb_person_default_on_file': 'Y', 'cb_person_cred_hist_length': 1}

In [74]:
df = pd.DataFrame([test_data])

df

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,20,500,RENT,2,DEBTCONSOLIDATION,F,290,5,0.58,Y,1


In [75]:
# Generate categorical values
df_categ = df.dtypes[df.dtypes == 'object'].index.to_list()

In [76]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse_output=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(df[df_categ]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names_out(df_categ)
encode_df.head()

Unnamed: 0,person_home_ownership_RENT,loan_intent_DEBTCONSOLIDATION,loan_grade_F,cb_person_default_on_file_Y
0,1.0,1.0,1.0,1.0


In [77]:
df = df.merge(encode_df, left_index=True, right_index=True)
df = df.drop(df_categ, 1)
df.head()

  df = df.drop(df_categ, 1)


Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,person_home_ownership_RENT,loan_intent_DEBTCONSOLIDATION,loan_grade_F,cb_person_default_on_file_Y
0,20,500,2,290,5,0.58,1,1.0,1.0,1.0,1.0


In [78]:
# Ensure all columns present in the training data are in the new data
training_categorical_columns = ['person_home_ownership_MORTGAGE', 'person_home_ownership_OTHER', 
                                 'person_home_ownership_OWN', 'person_home_ownership_RENT', 
                                 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 
                                 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 
                                 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 
                                 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 
                                 'loan_grade_F', 'loan_grade_G', 'cb_person_default_on_file_N', 
                                 'cb_person_default_on_file_Y']

missing_cols = set(training_categorical_columns).difference(df.columns)
for col in missing_cols:
    df[col] = 0.0

# Reorder the columns to match the order in the training data
final_df = df[['person_age', 'person_income', 'person_emp_length', 
                                                'loan_amnt', 'loan_int_rate','loan_percent_income', 'cb_person_cred_hist_length'] + training_categorical_columns  ]

pd.set_option('display.max_columns', None)

# Now new_df contains the preprocessed data ready for prediction
final_df

Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,person_home_ownership_MORTGAGE,person_home_ownership_OTHER,person_home_ownership_OWN,person_home_ownership_RENT,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE,loan_grade_A,loan_grade_B,loan_grade_C,loan_grade_D,loan_grade_E,loan_grade_F,loan_grade_G,cb_person_default_on_file_N,cb_person_default_on_file_Y
0,20,500,2,290,5,0.58,1,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0


In [79]:
X = final_df.values
X

array([[ 20.  , 500.  ,   2.  , 290.  ,   5.  ,   0.58,   1.  ,   0.  ,
          0.  ,   0.  ,   1.  ,   1.  ,   0.  ,   0.  ,   0.  ,   0.  ,
          0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   1.  ,   0.  ,
          0.  ,   1.  ]])

In [80]:
X_scaled = scaler.transform(X)

In [81]:
X_scaled

array([[-1.00e+00, -9.10e+03, -3.00e+00, -7.10e+02, -6.14e+00,  4.80e-01,
        -1.00e+00,  0.00e+00,  0.00e+00, -1.00e+00,  1.00e+00,  1.00e+00,
        -1.00e+00,  0.00e+00,  0.00e+00,  0.00e+00,  0.00e+00,  0.00e+00,
        -1.00e+00,  0.00e+00,  0.00e+00,  0.00e+00,  1.00e+00,  0.00e+00,
        -1.00e+00,  1.00e+00]])

In [82]:
model.predict(X_scaled)



array([[1.]], dtype=float32)

In [83]:
# SUCCESS!!

## Make a function for our Flask App

In [85]:
def process_(data):
    enc = OneHotEncoder(sparse_output=False)
    
    # Convert our JSON Data to Pandas Dataframe
    df = pd.DataFrame([data])

    # Store our categorical values (Will use for encoder)
    df_categ = df.dtypes[df.dtypes == 'object'].index.to_list()

    # Fit and transform the OneHotEncoder using the categorical variable list
    encode_df = pd.DataFrame(enc.fit_transform(df[df_categ]))

    # Add the encoded variable names to the dataframe
    encode_df.columns = enc.get_feature_names_out(df_categ)

    # Merge the encoded categorical columns with our original colums
    df = df.merge(encode_df, left_index=True, right_index=True)

    # Drop the categorical colums that are not encoded
    df = df.drop(df_categ, 1)

    # Ensure all columns present in the training data are in the new data
    training_categorical_columns = ['person_home_ownership_MORTGAGE', 'person_home_ownership_OTHER', 
                                 'person_home_ownership_OWN', 'person_home_ownership_RENT', 
                                 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 
                                 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 
                                 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 
                                 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 
                                 'loan_grade_F', 'loan_grade_G', 'cb_person_default_on_file_N', 
                                 'cb_person_default_on_file_Y']

    missing_cols = set(training_categorical_columns).difference(df.columns)
    for col in missing_cols:
        df[col] = 0.0

    # Reorder the columns to match the order in the training data
    final_df = df[['person_age', 'person_income', 'person_emp_length', 
                                                'loan_amnt', 'loan_int_rate','loan_percent_income', 'cb_person_cred_hist_length'] + training_categorical_columns  ]

    # Get our X values
    X = final_df.values

    # Transform our X values using our saved scaler from the training data
    X_scaled = scaler.transform(X)

    # Return processed data
    return X_scaled



In [87]:
# Data of HIGH credit risk
test_data = {
            'person_age': 20, 
            'person_income': 500, 
            'person_home_ownership': 'RENT', 
            'person_emp_length': 2, 
            'loan_intent': 'DEBTCONSOLIDATION', 
            'loan_grade': 'F', 
            'loan_amnt': 290, 
            'loan_int_rate': 5, 
            'loan_percent_income': 0.58, 
            'cb_person_default_on_file': 'Y', 
            'cb_person_cred_hist_length': 1
            }

In [88]:
# Data of LOW credit risk
test_data_2 = {
            'person_age': 30, 
            'person_income': 50000, 
            'person_home_ownership': 'MORTGAGE', 
            'person_emp_length': 5, 
            'loan_intent': 'EDUCATION', 
            'loan_grade': 'B', 
            'loan_amnt': 2900, 
            'loan_int_rate': 5, 
            'loan_percent_income': 0.06, 
            'cb_person_default_on_file': 'N', 
            'cb_person_cred_hist_length': 6
            }

In [93]:
prediction = model.predict(process_(test_data))



  df = df.drop(df_categ, 1)


In [96]:
prediction[0].tolist()

1.0