In [37]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle
import pandas as pd
import numpy as np


In [38]:
# load the ann trained model,scaler pickle ,onehot
model=load_model('model.h5')

# load the encoder and scaler
with open('onehot_encoder_geo.pkl','rb') as file:
    onehot_encoder_geo=pickle.load(file)

with open('label_encoder_gender.pkl','rb') as file:
    label_encoder_gender=pickle.load(file) 

with open('scaler.pkl','rb') as file:
    scaler=pickle.load(file)      

In [39]:
# example input data
input_data={
    'CreditScore': 600,
    'Geography': 'France',
    'Gender': 'Male',
    'Age':40,
    'Tenure': 3,
    'Balance': 60000,
    'NumOfProducts': 2,
    'HasCrCard': 1,
    'IsActiveMember': 1,
    'EstimatedSalary': 50000
}
# we have to see how to convert string value to numerical value
# then take all the numerical value to scale down those value using standard scaler-- thats why we have stored these in the form of pickle file

In [40]:
# convert categorical to numerical
# applying standard scaling
# prediction with respect to the model

In [41]:
# one hot encode on geography

geo_encoded=onehot_encoder_geo.transform([[input_data['Geography']]]).toarray()
geo_encoded_df=pd.DataFrame(geo_encoded,columns=onehot_encoder_geo.get_feature_names_out(['Geography']))
geo_encoded_df



Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0


In [42]:
# datas are in key value pairs..so we are converting that into dataframe
input_df=pd.DataFrame([input_data])
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,600,France,Male,40,3,60000,2,1,1,50000


In [43]:
# so far we have changed the value for geography column-more than 2 values so one hot encoding
# now encode categorical variables-gender-- label encoding-binary values

input_df['Gender']=label_encoder_gender.transform(input_df['Gender'])
input_df


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,600,France,1,40,3,60000,2,1,1,50000


In [44]:
## concatenation with one hot encoder-- as for label encoder it has already done it in input_df itself
input_df=pd.concat([input_df.drop('Geography',axis=1),geo_encoded_df],axis=1)   # axis =1 is needed...else it will concat row wise..but we need column wise
input_df


Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,1,40,3,60000,2,1,1,50000,1.0,0.0,0.0


In [45]:
# scaling the input data  -- bring all the datas in a same scale so that there wont be much difference
input_scaled=scaler.transform(input_df)
input_scaled


array([[-0.53598516,  0.91324755,  0.10479359, -0.69539349, -0.25781119,
         0.80843615,  0.64920267,  0.97481699, -0.87683221,  1.00150113,
        -0.57946723, -0.57638802]])

In [46]:
# predict

prediction=model.predict(input_scaled)
prediction



array([[0.02057762]], dtype=float32)

In [47]:
# we can get prediction probability

prediction_proba=prediction[0][0]

In [48]:
prediction_proba

0.02057762

In [49]:
if prediction_proba > 0.5:
    print('the customer is likely to churn.')
else:
    print('the customer is not likely to churn')    

the customer is not likely to churn


In [50]:
''' you need to:
 Transform the column (transform(...))
 Convert it to array (toarray())
 Get new column names (get_feature_names_out)
 Create a new DataFrame with those names
 Drop old column and join the new one-hot encoded columns
 That’s why it feels like "more work."  '''

' you need to:\n Transform the column (transform(...))\n Convert it to array (toarray())\n Get new column names (get_feature_names_out)\n Create a new DataFrame with those names\n Drop old column and join the new one-hot encoded columns\n That’s why it feels like "more work."  '