In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

## Read the data and general info about the data

In [None]:
df = pd.read_csv('Churn_Modelling.csv')

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [None]:
X = df.iloc[:, 3:-1].values
y = df.iloc[:, -1].values

## Formatting

In [None]:
# encoding gender

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

print(X[:,2])

[0 0 0 ... 0 1 0]


In [None]:
# encoding geography

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

print(X)

[[1.0 0.0 1.0 ... 1 1 101348.88]
 [1.0 0.0 0.0 ... 0 1 112542.58]
 [1.0 0.0 1.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 1.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 1.0 ... 1 0 38190.78]]


## Train test split and feature scaling

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Building ANN and making predictions

In [None]:
# initializing ANN
ann = tf.keras.models.Sequential()

# Adding input layer and first hidden layer
ann.add(tf.keras.layers.Dense(units=6, activation="relu"))

# Adding second layer
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

# Adding output layer
ann.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# brain is created -> now train the brain!!

In [None]:
# compile the ANN
ann.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# training ANN on training set
ann.fit(X_train, y_train, batch_size=32, epochs=200)

<keras.callbacks.History at 0x7f419de9d990>

In [None]:
# prediction
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)

from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

print("CM: ", cm)
print("Accuracy: ", accuracy)

CM:  [[1502   93]
 [ 185  220]]
Accuracy:  0.861
