# Import libraries

In [2]:
import pandas as pd
import tensorflow as tf
import numpy as np

## Import DataSet

In [10]:
dataset = pd.read_csv('Churn_Modelling.csv')
#we don't need first 3 columns as they are irrelevant for classification
x = dataset.iloc[:,3:-1].values
#only last column is for classification
y = dataset.iloc[:,-1].values

In [11]:
print(x)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]


##**Encoding Categorical Data**

Label Encoding the Gender Column

In [12]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
x[:,2] = le.fit_transform(x[:,2])

In [13]:
print(x)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]


One hot encoding for "country" column

In [14]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [1])], remainder = 'passthrough') #applying one hot encoding in 1st column
x = np.array(ct.fit_transform(x))

In [15]:
print(x)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


##Split DataSet in Training and Test dataset

In [16]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

##Feature Scaling


In [17]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

## **Building ANN**

In [18]:
ann = tf.keras.models.Sequential()

Adding Input Layer and First Hidden Layer

In [19]:
ann.add(tf.keras.layers.Dense(6, activation = 'relu')) #this value of 6 we got from experiments

Add Second Hidden Layer

In [20]:
ann.add(tf.keras.layers.Dense(4, activation = 'relu')) #this value of 4 we got from experiments

Add Output Layer

In [22]:
ann.add(tf.keras.layers.Dense(1, activation = 'sigmoid')) #this value of 1, because this is single layer classification. either we get 0 or 1
#sigmoid gives better probability for binary outcomes