In [17]:
# Import necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython import get_ipython
from IPython.display import display

In [18]:
# Load the dataset

dataset = pd.read_csv("C:/Users/asus/Documents/Python Scripts/ Customer Churn/BankCustomers.csv")

In [19]:
# Display the first 5 rows of the dataset

dataset.head(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [20]:
# Extract the independent variables (features)

X = dataset.iloc[:,3:13].values # Selecting columns 3 to 12

In [21]:
# Convert X to a Pandas DataFrame with column names

X = pd.DataFrame(X, columns=['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure',
                                 'Balance', 'NumOfProducts', 'HasCrCard',
                                 'IsActiveMember', 'EstimatedSalary'])

In [22]:
# Create dummy variables for the 'Geography' column

states = pd.get_dummies(X['Geography'], drop_first=True) # One-hot encoding
states.head(5)

Unnamed: 0,Germany,Spain
0,False,False
1,False,True
2,False,False
3,False,False
4,False,True


In [23]:
# Create dummy variables for the 'Gender' column

gender = pd.get_dummies(X['Gender'], drop_first=True) # One-hot encoding
gender.head(5)

Unnamed: 0,Male
0,False
1,False
2,False
3,False
4,False


In [24]:
# Drop the original 'Geography' and 'Gender' columns

X = X.drop(['Geography','Gender'], axis = 1)

In [25]:
# Concatenate the dummy variables with the rest of the features

X = pd.concat([X, states, gender], axis = 1)
X.head(5)

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Germany,Spain,Male
0,619,42,2,0.0,1,1,1,101348.88,False,False,False
1,608,41,1,83807.86,1,0,1,112542.58,False,True,False
2,502,42,8,159660.8,3,1,0,113931.57,False,False,False
3,699,39,1,0.0,2,0,0,93826.63,False,False,False
4,850,43,2,125510.82,1,1,1,79084.1,False,True,False


In [26]:
# Split the data into training and testing sets

from sklearn.model_selection import train_test_split

 # Selecting the target variable (column 13)

Y = dataset.iloc[:,13].values

 # 80% training, 20% testing

X_train, X_test, Ytrain, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

In [None]:
# Feature scaling using StandardScaler

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

# Fit and transform on training data

X_train = sc.fit_transform(X_train)

# Transform testing data using the same scaler

X_test = sc.transform(X_test)

In [None]:
!pip install tensorflow

In [None]:
# Build the Artificial Neural Network (ANN)

import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
 # Create a sequential model

Classifier = Sequential()
Classifier.add(Dense(activation='relu', input_dim=11, units=6, kernel_initializer="uniform"))
Classifier.add(Dense(activation='relu',units=6, kernel_initializer="uniform"))
Classifier.add(Dense(activation='sigmoid', units=1, kernel_initializer="uniform"))

In [None]:
# Compile the model

Classifier.compile(optimizer="adam", loss="binary_crossentropy", metrics=['accuracy'])  # Using Adam optimizer, binary cross-entropy loss


In [None]:
# Train the model

Classifier.fit(X_train, Ytrain, batch_size=10, epochs=100)

In [38]:
# Make predictions on the test set
y_pred = Classifier.predict(X_test)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


In [None]:
# Convert probabilities to binary predictions
y_pred = (y_pred > 0.5)

In [None]:
# Evaluate the model
from sklearn.metrics import confusion_matrix, accuracy_score

# Create confusion matrix
cm = confusion_matrix(Y_test, y_pred)

# Calculate accuracy score
accuracy_score(Y_test, y_pred)

Imports

Numpy: Used for numerical operations, especially with arrays and matrices.


pandas: Used for data manipulation and analysis, particularly with DataFrames.
matplotlib.pyplot: Used for creating visualizations, such as plots and charts.
IPython.get_ipython: Provides access to the current IPython InteractiveShell instance.
IPython.display.display: Used to display objects in the output area of a code cell.
sklearn.model_selection.train_test_split: Splits data into random train and test subsets.
sklearn.preprocessing.StandardScaler: Standardizes features by removing the mean and scaling to unit variance.
tensorflow: An open-source machine learning framework.
tensorflow.keras.models.Sequential: A linear stack of layers for building neural networks.
tensorflow.keras.layers.Dense: A densely connected neural network layer.
sklearn.metrics.confusion_matrix: Computes a confusion matrix to evaluate the accuracy of a classification.
sklearn.metrics.accuracy_score: Calculates the accuracy score of a classification model.
Methods

pd.read_csv(): Reads data from a CSV file into a pandas DataFrame.
dataset.head(): Displays the first few rows of a DataFrame.
pd.DataFrame(): Creates a pandas DataFrame from an array or other data structures.
pd.get_dummies(): Converts categorical variables into dummy/indicator variables.
X.drop(): Removes specified columns from a DataFrame.
pd.concat(): Concatenates two or more DataFrames along a particular axis.
train_test_split(): Splits data into training and testing sets.
sc.fit_transform(): Fits a StandardScaler to data and then transforms it.
sc.transform(): Transforms data using a pre-fitted StandardScaler.
Sequential(): Creates a Keras Sequential model.
Classifier.add(): Adds a layer to a Keras model.
Classifier.compile(): Configures the learning process of a Keras model.
Classifier.fit(): Trains a Keras model on data.
Classifier.predict(): Generates predictions from a Keras model.
confusion_matrix(): Creates a confusion matrix.
accuracy_score(): Calculates the accuracy score.
Attributes

iloc: Used to select rows and columns from a DataFrame by their integer location.
values: Returns the NumPy array representation of a pandas DataFrame.
columns: Specifies the column names of a DataFrame.
axis: Determines the axis along which an operation is performed (0 for rows, 1 for columns).
test_size: Specifies the proportion of data to include in the test split.
random_state: Controls the shuffling applied to the data before applying the split.
activation: Specifies the activation function for a neural network layer.
input_dim: Specifies the number of input features for a neural network layer.
units: Specifies the number of neurons in a neural network layer.
kernel_initializer: Specifies the weight initialization method for a neural network layer.
optimizer: Specifies the optimization algorithm for training a neural network.
loss: Specifies the loss function to be minimized during training.
metrics: Specifies the evaluation metrics to be monitored during training.
batch_size: Specifies the number of samples per gradient update.
epochs: Specifies the number of times the training data is iterated over.