In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


df = pd.read_csv('/kaggle/input/bank-customer-churn-prediction/Churn_Modelling.csv') 
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [11]:
# remove unnecessary columns
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
#converting categorical data to numerical
df = pd.get_dummies(df, drop_first=True)
#Exited' → 1 = churned (left) and 0 = not churned (stayed)
x = df.drop('Exited', axis=1)  
y = df['Exited']  


In [12]:
#splitting dataset into test and train
x_train,x_test, y_train, y_test = train_test_split(x, y, test_size=0.2,random_state=42,stratify=y)


In [15]:
#random forest model
#Model-1
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(x_train, y_train)



In [19]:
#predicition
y_pred = rf_model.predict(x_test)

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))


Confusion Matrix:
 [[1541   52]
 [ 219  188]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.97      0.92      1593
           1       0.78      0.46      0.58       407

    accuracy                           0.86      2000
   macro avg       0.83      0.71      0.75      2000
weighted avg       0.86      0.86      0.85      2000

Accuracy Score: 0.8645


In [20]:
#gradient boosting 
from sklearn.ensemble import GradientBoostingClassifier


In [22]:
#gradient boosting model training
#Model-2
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, 
                                      max_depth=3, random_state=42)
gb_model.fit(x_train, y_train)


In [24]:
y_pred_gb = gb_model.predict(x_test)

print("Gradient Boosting - Confusion Matrix:\n", confusion_matrix(y_test, y_pred_gb))
print("\nGradient Boosting - Classification Report:\n", classification_report(y_test, y_pred_gb))
print("Gradient Boosting - Accuracy Score:", accuracy_score(y_test, y_pred_gb))


Gradient Boosting - Confusion Matrix:
 [[1541   52]
 [ 208  199]]

Gradient Boosting - Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.97      0.92      1593
           1       0.79      0.49      0.60       407

    accuracy                           0.87      2000
   macro avg       0.84      0.73      0.76      2000
weighted avg       0.86      0.87      0.86      2000

Gradient Boosting - Accuracy Score: 0.87
