# **Churning Modeling** 

In [50]:
import tensorflow
import numpy as np
import pandas as pd
import pickle 
import matplotlib.pyplot as plt 
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.model_selection import train_test_split


In [51]:
df = pd.read_csv("Churn_Modelling.csv")

In [52]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [53]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


# Pre-processing the data

In [54]:
df['Gender'].unique()

array(['Female', 'Male'], dtype=object)

In [55]:
# encoding the gender column
lb_gender = LabelEncoder()
df['Gender'] = lb_gender.fit_transform(df['Gender'])

In [56]:
df['Gender'].unique()

array([0, 1])

In [57]:
# geography column info
df['Geography'].unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [58]:
from sklearn.preprocessing import OneHotEncoder
# Instantiate OneHotEncoder
ohe_geography = OneHotEncoder()

# Transform the 'Geography' column and convert it to a DataFrame
encoded_geography = ohe_geography.fit_transform(df[['Geography']]).toarray()

# Create column names for the one-hot encoded columns
encoded_columns = ohe_geography.get_feature_names_out(['Geography'])

# Convert to DataFrame and concatenate with original DataFrame
df_encoded = pd.DataFrame(encoded_geography, columns=encoded_columns)
df = pd.concat([df, df_encoded], axis=1)

# Drop the original 'Geography' column if no longer needed
df = df.drop('Geography', axis=1)


In [59]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,1,15634602,Hargrave,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,2,15647311,Hill,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,3,15619304,Onio,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,4,15701354,Boni,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,5,15737888,Mitchell,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [60]:
# first three column are irrelevant so drop them
df = df.drop(['RowNumber','CustomerId', 'Surname'], axis=1)

In [61]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [62]:
# checking null values 
df.isnull().sum()

CreditScore          0
Gender               0
Age                  0
Tenure               0
Balance              0
NumOfProducts        0
HasCrCard            0
IsActiveMember       0
EstimatedSalary      0
Exited               0
Geography_France     0
Geography_Germany    0
Geography_Spain      0
dtype: int64

In [63]:
with open("lbe_gender.pkl",'wb') as file:
    pickle.dump(lb_gender,file)
    
with open("ohe_geography.pkl",'wb') as file:
    pickle.dump(ohe_geography,file)
    

# Splitting Data for training and testing

In [64]:
# Dependent and Independent features 
X = df.drop('Exited', axis=1)
y = df['Exited']

# Splitting data for trainig and testing 

X_train,X_test,y_train,y_test = train_test_split(X,y, random_state=42, test_size=0.25)

In [65]:
X_train

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
4901,673,1,59,0,178058.06,2,0,1,21063.71,1.0,0.0,0.0
4375,850,1,41,8,60880.68,1,1,0,31825.84,0.0,1.0,0.0
6698,725,0,31,6,0.00,1,0,0,61326.43,1.0,0.0,0.0
9805,644,1,33,7,174571.36,1,0,1,43943.09,1.0,0.0,0.0
1101,703,1,29,9,0.00,2,1,0,50679.48,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
5734,768,1,54,8,69712.74,1,1,1,69381.05,1.0,0.0,0.0
5191,682,0,58,1,0.00,1,1,1,706.50,1.0,0.0,0.0
5390,735,0,38,1,0.00,3,0,0,92220.12,1.0,0.0,0.0
860,667,1,43,8,190227.46,1,1,0,97508.04,1.0,0.0,0.0


In [66]:
# Scaling the data 
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [67]:
# save the scaler
with open("scaler.pkl", 'wb') as file:
    pickle.dump(scaler,file)

# Building the ANN model 

In [68]:
import tensorflow as tf 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [69]:
## Build Our ANN Model
model=Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)), ## HL1 Connected wwith input layer
    Dense(32,activation='relu'), ## HL2
    Dense(1,activation='sigmoid')  ## output layer
])

In [70]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 64)                832       
                                                                 
 dense_16 (Dense)            (None, 32)                2080      
                                                                 
 dense_17 (Dense)            (None, 1)                 33        
                                                                 
Total params: 2,945
Trainable params: 2,945
Non-trainable params: 0
_________________________________________________________________


In [71]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [72]:
# log directory
log_dir = "logs/fit" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir=log_dir,histogram_freq=1)

In [73]:
earlyStopping_callback = EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)

In [74]:
history = model.fit(
    X_train,y_train,validation_data =(X_test,y_test),epochs = 100, 
    callbacks=[tensorflow_callback,earlyStopping_callback]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100


In [75]:
model.save('model.h5')

In [76]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [77]:
#%tensorboard --logdir logs/fit20241118-114042/

# Prediction

In [78]:
from tensorflow.keras.models import load_model

In [79]:
model = load_model('model.h5')

with open('ohe_geography.pkl','rb') as file:
    ohe_geography_model = pickle.load(file)

with open('lbe_gender.pkl','rb') as file:
    lbe_gender_model = pickle.load(file)

with open('scaler.pkl','rb') as file:
    scaler_model = pickle.load(file)

In [80]:
# Example input data
input_data = {
    'CreditScore': 600,
    'Geography': 'France',
    'Gender': 'Male',
    'Age': 40,
    'Tenure': 3,
    'Balance': 60000,
    'NumOfProducts': 2,
    'HasCrCard': 1,
    'IsActiveMember': 1,
    'EstimatedSalary': 50000
}

In [86]:
# Ignore all warnings
import warnings
warnings.filterwarnings('ignore')

In [87]:
# Encode input data using the loaded encoders
geography_encoded = ohe_geography_model.transform([[input_data['Geography']]]).toarray()
gender_encoded = lbe_gender_model.transform([input_data['Gender']]).reshape(-1, 1)

# Combine all features into a single input array for prediction
input_features = np.concatenate([
    [[input_data['CreditScore']]],
    geography_encoded,  # Assuming one-hot encoding expands this dimension
    gender_encoded,
    [[input_data['Age']]],
    [[input_data['Tenure']]],
    [[input_data['Balance']]],
    [[input_data['NumOfProducts']]],
    [[input_data['HasCrCard']]],
    [[input_data['IsActiveMember']]],
    [[input_data['EstimatedSalary']]]
], axis=1)

# Scale the features using the loaded scaler
scaled_features = scaler_model.transform(input_features)

# Make a prediction
prediction = model.predict(scaled_features)

# Display the message based on the prediction
if prediction[0] > 0.5:
    print("The customer is likely to churn.")
else:
    print("The customer is not likely to churn.")


The customer is not likely to churn.
