In [39]:
import pandas as pd
import tensorflow as tf
import pickle
import numpy as np
from tensorflow.keras.models import load_model

# Load test data
test_df = pd.read_csv("Data/test.csv")

# Check the first few rows
print(test_df.head())

       id  CustomerId    Surname  CreditScore Geography  Gender   Age  Tenure  \
0  165034    15773898   Lucchese          586    France  Female  23.0       2   
1  165035    15782418       Nott          683    France  Female  46.0       2   
2  165036    15807120         K?          656    France  Female  34.0       7   
3  165037    15808905  O'Donnell          681    France    Male  36.0       8   
4  165038    15607314    Higgins          752   Germany    Male  38.0      10   

     Balance  NumOfProducts  HasCrCard  IsActiveMember  EstimatedSalary  
0       0.00              2        0.0             1.0        160976.75  
1       0.00              1        1.0             0.0         72549.27  
2       0.00              2        1.0             0.0        138882.09  
3       0.00              1        1.0             0.0        113931.57  
4  121263.62              1        1.0             0.0        139431.00  


In [40]:
test_df= test_df.drop(['CustomerId','Surname','id'], axis=1)

In [41]:
### load ANN trained model and scaler pickel
model=load_model('model.h5')

#load scaler and encoder
with open('Data/label_encoder_gender.pkl','rb')as file:
    label_encoder_gender=pickle.load(file)

with open('Data/encode_geo.pkl','rb') as file:
    encode_geo=pickle.load(file)

with open('Data/scaler.pkl','rb') as file:
    scaler=pickle.load(file)



In [42]:
## one hot encoded geography
geo = encode_geo.transform(test_df[['Geography']]).toarray()
geo_encoded_df = pd.DataFrame(geo,columns=encode_geo.get_feature_names_out(['Geography']))

In [43]:
test_df['Gender']=label_encoder_gender.transform(test_df['Gender'])
test_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,586,France,0,23.0,2,0.00,2,0.0,1.0,160976.75
1,683,France,0,46.0,2,0.00,1,1.0,0.0,72549.27
2,656,France,0,34.0,7,0.00,2,1.0,0.0,138882.09
3,681,France,1,36.0,8,0.00,1,1.0,0.0,113931.57
4,752,Germany,1,38.0,10,121263.62,1,1.0,0.0,139431.00
...,...,...,...,...,...,...,...,...,...,...
110018,570,Spain,1,29.0,7,116099.82,1,1.0,1.0,148087.62
110019,575,France,0,36.0,4,178032.53,1,1.0,1.0,42181.68
110020,712,France,1,31.0,2,0.00,2,1.0,0.0,16287.38
110021,709,France,0,32.0,3,0.00,1,1.0,1.0,158816.58


In [44]:
test_df=pd.concat([test_df.drop("Geography",axis=1),geo_encoded_df],axis=1)
test_df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,586,0,23.0,2,0.00,2,0.0,1.0,160976.75,1.0,0.0,0.0
1,683,0,46.0,2,0.00,1,1.0,0.0,72549.27,1.0,0.0,0.0
2,656,0,34.0,7,0.00,2,1.0,0.0,138882.09,1.0,0.0,0.0
3,681,1,36.0,8,0.00,1,1.0,0.0,113931.57,1.0,0.0,0.0
4,752,1,38.0,10,121263.62,1,1.0,0.0,139431.00,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
110018,570,1,29.0,7,116099.82,1,1.0,1.0,148087.62,0.0,0.0,1.0
110019,575,0,36.0,4,178032.53,1,1.0,1.0,42181.68,1.0,0.0,0.0
110020,712,1,31.0,2,0.00,2,1.0,0.0,16287.38,1.0,0.0,0.0
110021,709,0,32.0,3,0.00,1,1.0,1.0,158816.58,1.0,0.0,0.0


In [45]:
test_scaled = scaler.transform(test_df)
test_scaled

array([[-0.88432411, -1.13761448, -1.70477925, ...,  0.86727152,
        -0.51576047, -0.52978452],
       [ 0.32752662, -1.13761448,  0.88829874, ...,  0.86727152,
        -0.51576047, -0.52978452],
       [-0.00979265, -1.13761448, -0.46461151, ...,  0.86727152,
        -0.51576047, -0.52978452],
       ...,
       [ 0.68983251,  0.87903241, -0.80283908, ...,  0.86727152,
        -0.51576047, -0.52978452],
       [ 0.65235259, -1.13761448, -0.69009656, ...,  0.86727152,
        -0.51576047, -0.52978452],
       [-0.44705838, -1.13761448, -0.12638395, ...,  0.86727152,
        -0.51576047, -0.52978452]])

In [46]:
prediction=model.predict(test_scaled)
prediction

[1m3439/3439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 165us/step


array([[0.00980349],
       [0.774892  ],
       [0.02465466],
       ...,
       [0.0192364 ],
       [0.18479326],
       [0.23395455]], dtype=float32)

In [47]:
prediction_prob=prediction[0][0]
prediction_prob

0.009803487

In [48]:
if(prediction_prob>0.5):
    print('The customer is likely to churn')
else:
    print('The customer is  not likely to churn')

The customer is  not likely to churn
