In [None]:
# Loading the libraries
import numpy as np
# numpy stands for numerical python
import pandas as pd
# pandas stands for panel data
import matplotlib.pyplot as plt
# Matlab plot library

In [None]:
# read the data file
dataset = pd.read_csv("Churn_Modelling.csv")
# Check is the data is properly loaded
dataset.head()

In [None]:
X = dataset.iloc[:,3:13].values
X

In [None]:
# To make things work on python we have to convert the panel data into numpy array
y = dataset.iloc[:,13].values
type(y)
y

In [None]:
# Importing the relevant libraries from sklearn preprocessing
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
# Pass on the functionality of the library LabelEncoder to another variable
labelencoder = LabelEncoder()
X[:,2] = labelencoder.fit_transform(X[:,2])
X
# The next step is to label encode the geography
X[:,1] = labelencoder.fit_transform(X[:,1])
X

In [None]:
# Get the library from sklearn.compose
from sklearn.compose import ColumnTransformer
# OneHot Encoder has been applied to column 1
# I have given the ColumnTransformer function a name of my choice which is country
# The functionality has been passed onto the variable ct but still not applied on the data
ct = ColumnTransformer([("Country", OneHotEncoder(), [1])], remainder = 'passthrough')
# We shall now apply it on the data
X = ct.fit_transform(X)
X

In [None]:
 # Call the pandas dataframe command to convert the data into a frame which can be used further
# Note you are converting data type from Numpy array to Pandas
X = pd.DataFrame(X, columns = ["X1", "X2", "X3", "Credit Score", "Gender",
                               "Age","Tenure", "Balance", "NumofProducts",
                               "HasCrCard", "IsActive Member", "Estimated Salary"])
X

In [None]:
X = X.drop("X1", axis = 1)
X

In [None]:
# Splitting the dataset into the Training set and Test set
# The train_test_split library function is located in sklearn.model_selection
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
# Feature Scaling
# To standardize the data get the StandardScaler function from sklearn.preprocessing
from sklearn.preprocessing import StandardScaler
# Pass on the functionality to a new variable called sc
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
# The model is trained to recognize the mean and standard deviation of train data
# If it is working properly the test data using the mean and standard deviation of train data should give good reults
X_test = sc.transform(X_test)

In [None]:
# Importing the Keras libraries and packages
import keras
# Sequential because the model has all layers in a sequence.
from keras.models import Sequential
# Dense implies hidden layers which we cannot access from outside
# These are built in Keras functions
from keras.layers import Dense
# Initialising the Deep Learning Model
classifier = Sequential()
# Adding the input layer
# Input_dimensions = 11-
# The first hidden layer has 6 units, weights are taken randomly from an uniform distribution and activation is ReLu
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
# Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
# Adding the output layer
# Here the activtation function is Sigmoid as it is a binary classification problem
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
# Compiling the DNN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
# Fitting the DNN to the Training set
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)

In [None]:
# Computation of Accuracy directly
from sklearn import metrics
#y_pred = classifier.predict(X_test)
i = [0.05, 0.1, 0.15, 0.2,0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
accuracy= []
for cutoff in i:
  y_pred = (classifier.predict(X_test) > cutoff) # compare each element of the array with the cutoff
  accuracy.append(metrics.accuracy_score(y_test, y_pred))

accuracy

In [None]:
import seaborn as sns
from sklearn import metrics
# Predicting the Test set results
# Determine if the value is less than or more than the cut-off value
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot = True, fmt='');

In [None]:
# New Customer
# Geography: France (0,0)
# Credit Score: 600
# Gender: Male (1)
# Age: 40 years old
# Tenure: 3 years
# Balance: $60000
# Number of Products: 2
# Does this customer have a credit card ? Yes (1)
# Is this customer an Active Member: Yes (1)
# Estimated Salary: $50000
# Predict the class of this customer


new_customer = classifier.predict(sc.transform(np.array([[0,0,600,1,40,3,60000,2,1,1,50000]])))
# You will get a probability value for new_customer
# Then you compare with the cut-off
print("Probability of new customer:", new_customer)
new_prediction = (new_customer>0.5)
new_prediction