# Part 1 - Libraries

In [4]:
import pandas as pd
import tensorflow as tf
import numpy as np


In [5]:
tf.__version__

'2.18.0'

# Part 2 - Import the Dataset

In [22]:
df = pd.read_csv('/content/Churn_Modelling.csv')

In [23]:
df.head(10)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0
5,6,15574012,Chu,645,Spain,Male,44,8,113755.78,2,1,0,149756.71,1
6,7,15592531,Bartlett,822,France,Male,50,7,0.0,2,1,1,10062.8,0
7,8,15656148,Obinna,376,Germany,Female,29,4,115046.74,4,1,0,119346.88,1
8,9,15792365,He,501,France,Male,44,4,142051.07,2,0,1,74940.5,0
9,10,15592389,H?,684,France,Male,27,2,134603.88,1,1,1,71725.73,0


In [24]:
X = df.drop(["RowNumber","CustomerId","Surname","Exited"],axis=1)
y = df["Exited"]

# Part 3 - Encode Categorical Data

In [25]:
categorical_columns = X.select_dtypes(include=['object']).columns

In [26]:
categorical_columns

Index(['Geography', 'Gender'], dtype='object')

In [27]:
df['Geography'].unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [28]:
from sklearn.preprocessing import OneHotEncoder,LabelEncoder

le = LabelEncoder()

X['Gender'] = le.fit_transform(X['Gender'])

In [29]:
X['Gender']

Unnamed: 0,Gender
0,0
1,0
2,0
3,0
4,0
...,...
9995,1
9996,1
9997,0
9998,1


In [30]:
geo_dummies = pd.get_dummies(X['Geography'], prefix='Geography', drop_first=True)

# Orijinal Geography sütununu silip yerine dummy sütunları ekle
X = pd.concat([X.drop('Geography', axis=1), geo_dummies], axis=1)

X.head()


Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,False,False
1,608,0,41,1,83807.86,1,0,1,112542.58,False,True
2,502,0,42,8,159660.8,3,1,0,113931.57,False,False
3,699,0,39,1,0.0,2,0,0,93826.63,False,False
4,850,0,43,2,125510.82,1,1,1,79084.1,False,True


In [31]:
X

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,False,False
1,608,0,41,1,83807.86,1,0,1,112542.58,False,True
2,502,0,42,8,159660.80,3,1,0,113931.57,False,False
3,699,0,39,1,0.00,2,0,0,93826.63,False,False
4,850,0,43,2,125510.82,1,1,1,79084.10,False,True
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,False,False
9996,516,1,35,10,57369.61,1,1,1,101699.77,False,False
9997,709,0,36,7,0.00,1,0,1,42085.58,False,False
9998,772,1,42,3,75075.31,2,1,0,92888.52,True,False


In [33]:
print(X)

      CreditScore  Gender  Age  Tenure    Balance  NumOfProducts  HasCrCard  \
0             619       0   42       2       0.00              1          1   
1             608       0   41       1   83807.86              1          0   
2             502       0   42       8  159660.80              3          1   
3             699       0   39       1       0.00              2          0   
4             850       0   43       2  125510.82              1          1   
...           ...     ...  ...     ...        ...            ...        ...   
9995          771       1   39       5       0.00              2          1   
9996          516       1   35      10   57369.61              1          1   
9997          709       0   36       7       0.00              1          0   
9998          772       1   42       3   75075.31              2          1   
9999          792       0   28       4  130142.79              1          1   

      IsActiveMember  EstimatedSalary  Geography_Ge

# Part 4 - Train Test Split

In [34]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Part 5 - Scaling

In [None]:
# Scaling ANN lerde çok önemli ve yapılması gereken bir işlemdir.

In [None]:
342 334 336 328 322

In [35]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [36]:
X_train

array([[ 0.35649971,  0.91324755, -0.6557859 , ...,  1.36766974,
        -0.57946723, -0.57638802],
       [-0.20389777,  0.91324755,  0.29493847, ...,  1.6612541 ,
         1.72572313, -0.57638802],
       [-0.96147213,  0.91324755, -1.41636539, ..., -0.25280688,
        -0.57946723,  1.73494238],
       ...,
       [ 0.86500853, -1.09499335, -0.08535128, ..., -0.1427649 ,
        -0.57946723, -0.57638802],
       [ 0.15932282,  0.91324755,  0.3900109 , ..., -0.05082558,
        -0.57946723, -0.57638802],
       [ 0.47065475,  0.91324755,  1.15059039, ..., -0.81456811,
         1.72572313, -0.57638802]])

# Part 6 - Building the ANN

initalizing the ANN

In [38]:
ann = tf.keras.models.Sequential()

Adding the inpput layer and the first hidden layer

In [39]:
ann.add(tf.keras.layers.Dense(units=6,activation='relu')) # Burada units= kısmında kaç gizli nöron olmasını istediğimizi yazıyoruz. ve aktivasyon fonksiyonumuzu yazıyoruz.

adding the second hidden layer

In [40]:
ann.add(tf.keras.layers.Dense(units=6,activation='relu')) #üsttekinin aynısını yapıştırarak yeni bir katman oluşturabiliriz.

adding the output layer

In [41]:
ann.add(tf.keras.layers.Dense(units=1,activation='sigmoid')) # Burada ise output için 1 nöron seçtik çünkü ikili sınıflandırmalı tahmin değerimiz ve act func için de sigmoid aldık çünkü ikili sonuç için en iyisi o

# Part 7 - Train the ANN

Compiling the ANN

In [42]:
ann.compile(optimizer="adam" , loss= "binary_crossentropy", metrics= ["accuracy"]) # Derlemek için compile() kullanıyoruz.

Not: İkili sınıflandırma yaptığımız için binary_cross
ancak mesela çoklu yapsak categorical_crossentropy olurdu.


Training the ANN on the training set

In [43]:
ann.fit(X_train, y_train,batch_size= 32, epochs = 100) # Burada batch_size= toplu yığın yani kaç satırda bir cost function karşılaştırması yapsın gibi, epochs ise iterasyon sayısını eğitim sayısını belirler.

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6316 - loss: 0.6662
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7951 - loss: 0.4858
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8016 - loss: 0.4433
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8263 - loss: 0.4089
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8208 - loss: 0.4102
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8234 - loss: 0.3988
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8224 - loss: 0.4004
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8314 - loss: 0.3838
Epoch 9/100
[1m250/250[0m [32

<keras.src.callbacks.history.History at 0x7b268c627450>

# Part 8 - Predict Value

In [64]:
print(ann.predict(sc.transform([[1, 0, 0, 619, 0, 42, 2, 60000, 2, 1, 1]]) ) > 0.5)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[[False]]




In [62]:
df.head(1)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1


In [63]:
X_train

array([[ 0.35649971,  0.91324755, -0.6557859 , ...,  1.36766974,
        -0.57946723, -0.57638802],
       [-0.20389777,  0.91324755,  0.29493847, ...,  1.6612541 ,
         1.72572313, -0.57638802],
       [-0.96147213,  0.91324755, -1.41636539, ..., -0.25280688,
        -0.57946723,  1.73494238],
       ...,
       [ 0.86500853, -1.09499335, -0.08535128, ..., -0.1427649 ,
        -0.57946723, -0.57638802],
       [ 0.15932282,  0.91324755,  0.3900109 , ..., -0.05082558,
        -0.57946723, -0.57638802],
       [ 0.47065475,  0.91324755,  1.15059039, ..., -0.81456811,
         1.72572313, -0.57638802]])

# Last of the parts metrics

Conf Matrix

In [69]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[[1545   62]
 [ 218  175]]


0.86