# Test how to make predictions using saved tfmodel


In [1]:
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import joblib
from joblib import dump, load

In [2]:
encoder = joblib.load('../Resources/encoder.joblib')
scaler = joblib.load('../Resources/scaler.joblib')

In [3]:
file = ("../Resources/tensorflowmodel.keras")
model = tf.keras.models.load_model(file)

In [5]:
import pandas as pd

In [43]:
# Define the new data
new_data = {'person_age': 21, 'person_income': 9600, 'person_home_ownership': 'MORTGAGE', 'person_emp_length': 1, 'loan_intent': 'MEDICAL', 'loan_grade': 'C', 'loan_amnt': 9600, 'loan_int_rate': 12.87, 'loan_percent_income': 1, 'cb_person_default_on_file': 'N', 'cb_person_cred_hist_length': 1}

# Convert to DataFrame
new_df = pd.DataFrame([new_data])

# Convert categorical variables to one-hot encoding
categorical_columns = ['person_home_ownership', 'loan_intent', 'loan_grade', 'cb_person_default_on_file']
new_df = pd.get_dummies(new_df, columns=categorical_columns)

# Ensure all columns present in the training data are in the new data
training_categorical_columns = ['person_home_ownership_MORTGAGE', 'person_home_ownership_OTHER', 
                                 'person_home_ownership_OWN', 'person_home_ownership_RENT', 
                                 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 
                                 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 
                                 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 
                                 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 
                                 'loan_grade_F', 'loan_grade_G', 'cb_person_default_on_file_N', 
                                 'cb_person_default_on_file_Y']

missing_cols = set(training_categorical_columns).difference(new_df.columns)
for col in missing_cols:
    new_df[col] = 0

# Reorder the columns to match the order in the training data
new_df = new_df[['person_age', 'person_income', 'person_emp_length', 
                                                'loan_amnt', 'loan_int_rate','loan_percent_income', 'cb_person_cred_hist_length'] + training_categorical_columns  ]

pd.set_option('display.max_columns', None)

# Now new_df contains the preprocessed data ready for prediction
new_df

Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,person_home_ownership_MORTGAGE,person_home_ownership_OTHER,person_home_ownership_OWN,person_home_ownership_RENT,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE,loan_grade_A,loan_grade_B,loan_grade_C,loan_grade_D,loan_grade_E,loan_grade_F,loan_grade_G,cb_person_default_on_file_N,cb_person_default_on_file_Y
0,21,9600,1,9600,12.87,1,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0


In [51]:
scaled_data = scaler.fit_transform(new_df)

In [48]:
predictions = model.predict(scaled_data)



In [52]:
# Assuming it's a binary classification, you might want to threshold probabilities to get binary predictions
binary_predictions = (predictions > 0.5).astype(int)

# Print the predictions
print("Predictions:", binary_predictions[0][0])

Predictions: 0


## The Above Method Only predicts 0

# Try New Method from Flask app below

In [53]:
import numpy as np

In [61]:
def preprocess(data):
    # Updated lists without 'loan_status'
    numerical_feature_names = ['person_age', 'person_income', 'person_emp_length', 'loan_amnt', 'loan_int_rate', 
                               'loan_percent_income', 'cb_person_cred_hist_length']
    categorical_feature_names = ['person_home_ownership_MORTGAGE', 'person_home_ownership_OTHER', 
                                 'person_home_ownership_OWN', 'person_home_ownership_RENT', 
                                 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 
                                 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 
                                 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 
                                 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 
                                 'loan_grade_F', 'loan_grade_G', 'cb_person_default_on_file_N', 
                                 'cb_person_default_on_file_Y']

    df = pd.DataFrame([data])
    categorical_columns = ['person_home_ownership', 'loan_intent', 'loan_grade', 'cb_person_default_on_file']
    
    # Handling missing features by setting a default value or skipping
    numerical_features = np.array([data.get(feature, 0.0) for feature in numerical_feature_names]).reshape(1, -1)
    # Ensure categorical features are correctly extracted
    categorical_features = np.array([data.get(feature, 0) for feature in categorical_columns]).reshape(1, -1)
    
    # Apply one-hot encoding to categorical features
    encoded_features = encoder.transform(categorical_features).toarray()
    
    # Scale numerical features
    scaled_numerical_features = scaler.transform(numerical_features)
    
    # Concatenate scaled numerical features and encoded categorical features
    processed_data = np.concatenate([scaled_numerical_features, encoded_features], axis=1)
    
    return processed_data

In [63]:
#preprocess(new_data)

## preprocess function returns error

In [64]:
def preprocess_(data):
    # Updated lists without 'loan_status'
    numerical_feature_names = ['person_age', 'person_income', 'person_emp_length', 'loan_amnt', 'loan_int_rate', 
                               'loan_percent_income', 'cb_person_cred_hist_length']
    categorical_feature_names = ['person_home_ownership_MORTGAGE', 'person_home_ownership_OTHER', 
                                 'person_home_ownership_OWN', 'person_home_ownership_RENT', 
                                 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 
                                 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 
                                 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 
                                 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 
                                 'loan_grade_F', 'loan_grade_G', 'cb_person_default_on_file_N', 
                                 'cb_person_default_on_file_Y']

    df = pd.DataFrame([data])
    categorical_columns = ['person_home_ownership', 'loan_intent', 'loan_grade', 'cb_person_default_on_file']
    
    # Handling missing features by setting a default value or skipping
    numerical_features = np.array([data.get(feature, 0.0) for feature in numerical_feature_names]).reshape(1, -1)
    # Ensure categorical features are correctly extracted
    categorical_features = np.array([data.get(feature, 0) for feature in categorical_columns]).reshape(1, -1)
    
    # Apply one-hot encoding to categorical features
    encoded_features = encoder.transform(categorical_features).toarray()
    
    # Scale numerical features
    scaled_numerical_features = scaler.transform(numerical_features)
    
    # Concatenate scaled numerical features and encoded categorical features
    processed_data = np.concatenate([scaled_numerical_features, encoded_features], axis=1)
    
    return processed_data

# Testing 3rd method - manual


In [96]:
new_data = {'person_age': 21, 'person_income': 9600, 'person_home_ownership': 'OWN', 'person_emp_length': 5, 'loan_intent': 'EDUCATION', 'loan_grade': 'B', 'loan_amnt': 1000, 'loan_int_rate': 11.14, 'loan_percent_income': 0.1, 'cb_person_default_on_file': 'N', 'cb_person_cred_hist_length': 2}

In [97]:
test_df = pd.DataFrame([new_data])

test_df

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,21,9600,OWN,5,EDUCATION,B,1000,11.14,0.1,N,2


In [98]:
# Generate our categorical variable lists
df_cat = test_df.dtypes[test_df.dtypes == "object"].index.tolist()
df_cat

['person_home_ownership',
 'loan_intent',
 'loan_grade',
 'cb_person_default_on_file']

In [99]:
# Create a OneHotEncoder instance
encoder = OneHotEncoder()

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(encoder.fit_transform(test_df[df_cat]).toarray())

# Add the encoded variable names to the dataframe
encode_df.columns = encoder.get_feature_names_out(df_cat)
encode_df.head()

Unnamed: 0,person_home_ownership_OWN,loan_intent_EDUCATION,loan_grade_B,cb_person_default_on_file_N
0,1.0,1.0,1.0,1.0


In [100]:
test_df = test_df.merge(encode_df, left_index=True, right_index=True)
test_df = test_df.drop(df_cat, 1)
test_df.head()

  test_df = test_df.drop(df_cat, 1)


Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,person_home_ownership_OWN,loan_intent_EDUCATION,loan_grade_B,cb_person_default_on_file_N
0,21,9600,5,1000,11.14,0.1,2,1.0,1.0,1.0,1.0


In [101]:
len(test_df.columns)

11

In [102]:
# Ensure all columns present in the training data are in the new data
training_categorical_columns = ['person_home_ownership_MORTGAGE', 'person_home_ownership_OTHER', 
                                 'person_home_ownership_OWN', 'person_home_ownership_RENT', 
                                 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 
                                 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 
                                 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 
                                 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 
                                 'loan_grade_F', 'loan_grade_G', 'cb_person_default_on_file_N', 
                                 'cb_person_default_on_file_Y']

missing_cols = set(training_categorical_columns).difference(test_df.columns)
for col in missing_cols:
    test_df[col] = 0.0

# Reorder the columns to match the order in the training data
new_df = test_df[['person_age', 'person_income', 'person_emp_length', 
                                                'loan_amnt', 'loan_int_rate','loan_percent_income', 'cb_person_cred_hist_length'] + training_categorical_columns  ]

pd.set_option('display.max_columns', None)

# Now new_df contains the preprocessed data ready for prediction
new_df


Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,person_home_ownership_MORTGAGE,person_home_ownership_OTHER,person_home_ownership_OWN,person_home_ownership_RENT,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE,loan_grade_A,loan_grade_B,loan_grade_C,loan_grade_D,loan_grade_E,loan_grade_F,loan_grade_G,cb_person_default_on_file_N,cb_person_default_on_file_Y
0,21,9600,5,1000,11.14,0.1,2,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [110]:
X = new_df.values

In [111]:
X_scaler = scaler.fit_transform(X)

In [112]:
model.predict(X_scaler)[0][0]



0.14121939

In [106]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 8)                 216       
                                                                 
 dense_1 (Dense)             (None, 5)                 45        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 267
Trainable params: 267
Non-trainable params: 0
_________________________________________________________________


In [113]:
# Model still predicts same value for different data