In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("Churn_Modelling.csv")

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis = 'columns')

In [5]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
df.dtypes

CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [7]:
df['Geography'].unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [8]:
df['Gender'].unique()

array(['Female', 'Male'], dtype=object)

In [9]:
new_df = pd.get_dummies(data = df, columns = ['Gender', 'Geography'])

In [10]:
new_df.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Gender_Female,Gender_Male,Geography_France,Geography_Germany,Geography_Spain
0,619,42,2,0.0,1,1,1,101348.88,1,True,False,True,False,False
1,608,41,1,83807.86,1,0,1,112542.58,0,True,False,False,False,True
2,502,42,8,159660.8,3,1,0,113931.57,1,True,False,True,False,False
3,699,39,1,0.0,2,0,0,93826.63,0,True,False,True,False,False
4,850,43,2,125510.82,1,1,1,79084.1,0,True,False,False,False,True


In [11]:
new_df.dtypes

CreditScore            int64
Age                    int64
Tenure                 int64
Balance              float64
NumOfProducts          int64
HasCrCard              int64
IsActiveMember         int64
EstimatedSalary      float64
Exited                 int64
Gender_Female           bool
Gender_Male             bool
Geography_France        bool
Geography_Germany       bool
Geography_Spain         bool
dtype: object

In [12]:
new_df.isna().sum()

CreditScore          0
Age                  0
Tenure               0
Balance              0
NumOfProducts        0
HasCrCard            0
IsActiveMember       0
EstimatedSalary      0
Exited               0
Gender_Female        0
Gender_Male          0
Geography_France     0
Geography_Germany    0
Geography_Spain      0
dtype: int64

In [16]:
to_be_scaled = ['CreditScore', 'Balance', 'EstimatedSalary']

In [17]:
from sklearn.preprocessing import MinMaxScaler

In [18]:
scale = MinMaxScaler(feature_range = (0, 1))
new_df[to_be_scaled] = scale.fit_transform(new_df[to_be_scaled])

In [19]:
new_df.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Gender_Female,Gender_Male,Geography_France,Geography_Germany,Geography_Spain
0,0.538,42,2,0.0,1,1,1,0.506735,1,True,False,True,False,False
1,0.516,41,1,0.334031,1,0,1,0.562709,0,True,False,False,False,True
2,0.304,42,8,0.636357,3,1,0,0.569654,1,True,False,True,False,False
3,0.698,39,1,0.0,2,0,0,0.46912,0,True,False,True,False,False
4,1.0,43,2,0.500246,1,1,1,0.3954,0,True,False,False,False,True


In [20]:
X = new_df.drop(['Exited'], axis = 'columns')
y = new_df['Exited']

In [21]:
X.shape

(10000, 13)

In [22]:
y.shape

(10000,)

In [23]:
from sklearn.model_selection import train_test_split

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [25]:
X_train.shape

(8000, 13)

In [26]:
X_test.shape

(2000, 13)

In [28]:
import tensorflow as tf
from tensorflow import keras

In [29]:
model = keras.Sequential([
    keras.layers.Dense(units = 13, activation = 'relu'),
    keras.layers.Dense(units = 10, activation = 'relu'),
    keras.layers.Dense(units = 1, activation = 'sigmoid')
])

In [31]:
model.compile(
    metrics = ['accuracy'],
    optimizer = 'SGD',
    loss = "binary_crossentropy"
)

In [36]:
model.fit(X_train, y_train, epochs = 10)

Epoch 1/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7939 - loss: 0.5272
Epoch 2/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7967 - loss: 0.5136
Epoch 3/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8002 - loss: 0.5017
Epoch 4/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7975 - loss: 0.4969
Epoch 5/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7920 - loss: 0.4938
Epoch 6/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7959 - loss: 0.4900
Epoch 7/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7919 - loss: 0.4940
Epoch 8/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.7936 - loss: 0.4804
Epoch 9/10
[1m250/250[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x15e8c8b30>

In [37]:
model.summary()

In [38]:
model.evaluate(X_test, y_test)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7989 - loss: 0.4522


[0.46410319209098816, 0.7985000014305115]