In [None]:
''' Given a bank customer, build a neural network-based classifier that can determine whether
they will leave or not in the next 6 months.
Dataset Description: The case study is from an open-source dataset from Kaggle.
The dataset contains 10,000 sample points with 14 distinct features such as
CustomerId, CreditScore, Geography, Gender, Age, Tenure, Balance, etc.
Link to the Kaggle project:
https://www.kaggle.com/barelydedicated/bank-customer-churn-modeling
Perform following steps:
1. Read the dataset.
2. Distinguish the feature and target set and divide the data set into training and test sets.
3. Normalize the train and test data.
4. Initialize and build the model. Identify the points of improvement and implement the same.
5. Print the accuracy score and confusion matrix (5 points). '''

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('Churn_Modelling.csv')
df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [3]:
df = pd.get_dummies(df,columns=['Geography','Gender'] , drop_first=True)
df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_Germany,Geography_Spain,Gender_Male
0,1,15634602,Hargrave,619,42,2,0.00,1,1,1,101348.88,1,0,0,0
1,2,15647311,Hill,608,41,1,83807.86,1,0,1,112542.58,0,0,1,0
2,3,15619304,Onio,502,42,8,159660.80,3,1,0,113931.57,1,0,0,0
3,4,15701354,Boni,699,39,1,0.00,2,0,0,93826.63,0,0,0,0
4,5,15737888,Mitchell,850,43,2,125510.82,1,1,1,79084.10,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,39,5,0.00,2,1,0,96270.64,0,0,0,1
9996,9997,15569892,Johnstone,516,35,10,57369.61,1,1,1,101699.77,0,0,0,1
9997,9998,15584532,Liu,709,36,7,0.00,1,0,1,42085.58,1,0,0,0
9998,9999,15682355,Sabbatini,772,42,3,75075.31,2,1,0,92888.52,1,1,0,1


In [4]:
df['isExited'] = df['Exited'].values
df.drop(columns=['RowNumber','CustomerId','Surname','Exited'],inplace=True)
df

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_Germany,Geography_Spain,Gender_Male,isExited
0,619,42,2,0.00,1,1,1,101348.88,0,0,0,1
1,608,41,1,83807.86,1,0,1,112542.58,0,1,0,0
2,502,42,8,159660.80,3,1,0,113931.57,0,0,0,1
3,699,39,1,0.00,2,0,0,93826.63,0,0,0,0
4,850,43,2,125510.82,1,1,1,79084.10,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,39,5,0.00,2,1,0,96270.64,0,0,1,0
9996,516,35,10,57369.61,1,1,1,101699.77,0,0,1,0
9997,709,36,7,0.00,1,0,1,42085.58,0,0,0,1
9998,772,42,3,75075.31,2,1,0,92888.52,1,0,1,1


In [10]:
x= df.iloc[:,:-1].values
y= df.iloc[:,-1].values

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [11]:
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [12]:
model = keras.Sequential([
        keras.layers.Input(shape=(x_train.shape[1],)),
        keras.layers.Dense(64,activation='relu'),
        keras.layers.Dense(64,activation='relu'),
        keras.layers.Dense(1,activation='sigmoid'),
])

In [14]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
model.fit(x_train,y_train,epochs=10,batch_size=32,validation_data=(x_test,y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2a3393cbb20>

In [17]:
y_pred = (model.predict(x_test)>0.5).astype('int32')
y_pred



array([[0],
       [0],
       [0],
       ...,
       [1],
       [0],
       [0]])

In [18]:
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
print(accuracy)
print(cm)

0.8535
[[1566   41]
 [ 252  141]]


In [19]:
df

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_Germany,Geography_Spain,Gender_Male,isExited
0,619,42,2,0.00,1,1,1,101348.88,0,0,0,1
1,608,41,1,83807.86,1,0,1,112542.58,0,1,0,0
2,502,42,8,159660.80,3,1,0,113931.57,0,0,0,1
3,699,39,1,0.00,2,0,0,93826.63,0,0,0,0
4,850,43,2,125510.82,1,1,1,79084.10,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,39,5,0.00,2,1,0,96270.64,0,0,1,0
9996,516,35,10,57369.61,1,1,1,101699.77,0,0,1,0
9997,709,36,7,0.00,1,0,1,42085.58,0,0,0,1
9998,772,42,3,75075.31,2,1,0,92888.52,1,0,1,1


In [73]:

def predict_exit_probability(model, input_data):
    # Preprocess user input
    input_array = np.array([list(input_data.values())])
    
    # Scale the input data using the same scaler used during training
    input_array = scaler.transform(input_array)
    
    # Make predictions
    predictions = model.predict(input_array)
    
    # Return the predicted probability of exiting rounded to the nearest integer
    return round(predictions[0][0])

# Example usage with user input
input_data = {
    'CreditScore': float(input('Enter Credit Score: ')),  
    'Age': float(input('Enter Age: ')),  
    'Tenure': float(input('Enter Tenure: ')),  
    'Balance': float(input('Enter Balance: ')),  
    'NumOfProducts': float(input('Enter Number of Products: ')),  
    'HasCrCard': float(input('Enter Has Credit Card (1 for Yes, 0 for No): ')),
    'IsActiveMember': float(input('Enter IsActiveMember (1 for Yes, 0 for No): ')),  
    'EstimatedSalary': float(input('Enter Estimated Salary: ')),  
    'Geography_Germany': float(input('Enter Geography_Germany (1 if Germany, 0 otherwise): ')),
    'Geography_Spain': float(input('Enter Geography_Spain (1 if Spain, 0 otherwise): ')),
    'Gender_Male': float(input('Enter Gender_Male (1 if Male, 0 if Female): '))
}

predicted_exit_probability = predict_exit_probability(model, input_data)

print(f'The predicted exit probability is: {predicted_exit_probability}')


Enter Credit Score: -33
Enter Age: 77
Enter Tenure: 1
Enter Balance: 2333
Enter Number of Products: 5
Enter Has Credit Card (1 for Yes, 0 for No): 0
Enter IsActiveMember (1 for Yes, 0 for No): 0
Enter Estimated Salary: 2323
Enter Geography_Germany (1 if Germany, 0 otherwise): 1
Enter Geography_Spain (1 if Spain, 0 otherwise): 0
Enter Gender_Male (1 if Male, 0 if Female): 0
The predicted exit probability is: 1
