<h2 align='center'>Customer Churn Prediction</h2>

Dataset Used Here Is: Credit Card Customer Churn Dataset
https://www.kaggle.com/datasets/rjmanoj/credit-card-customer-churn-prediction

First of all we will use the basic ML Models.

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#importing all the essential libraries

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [None]:
df = pd.read_csv('Churn_Modelling.csv')
df.head()
#as its a binary classification problem hen you can see 0/1 in the last column

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
df.duplicated().sum()

In [None]:
df['Exited'].value_counts()
#to see how many left many customers left the bank

Hence it's a problem of imbalanced classification

In [None]:
df['Geography'].value_counts()

In [None]:
df['Gender'].value_counts()

In [None]:
# Exploratory Data Analysis (EDA)
# Visualize the distribution of the target variable (Churn/Exited)
plt.figure(figsize=(4, 2))
sns.countplot(x='Exited', data=df) 
plt.title('Distribution of Churn (Exited)')
plt.xlabel('Churn (0 = No, 1 = Yes)')
plt.ylabel('Count')
plt.show()

In [None]:
# Drop irrelevant columns (e.g., RowNumber, CustomerId, Surname)
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, errors='ignore')

In [None]:
# Encode categorical variables (e.g., Geography, Gender)
from sklearn.preprocessing import LabelEncoder, StandardScaler

label_encoder = LabelEncoder()
categorical_columns = df.select_dtypes(include=['object']).columns
for column in categorical_columns:
    df[column] = label_encoder.fit_transform(df[column])

In [None]:
# Visualize correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()

In [None]:
# Define features (X) and target (y)
X = df.drop('Exited', axis=1) 
y = df['Exited']

In [None]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Handle imbalanced data using SMOTE
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train_scaled, y_train)

In [None]:
# Model Training and Evaluation
# 1. Logistic Regression
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(random_state=42)
lr_model.fit(X_train_balanced, y_train_balanced)
lr_predictions = lr_model.predict(X_test_scaled)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

print("\nLogistic Regression Results:")
print("Accuracy:", accuracy_score(y_test, lr_predictions))
print("Precision:", precision_score(y_test, lr_predictions))
print("Recall:", recall_score(y_test, lr_predictions))
print("F1 Score:", f1_score(y_test, lr_predictions))
print("\nClassification Report:")
print(classification_report(y_test, lr_predictions))

In [None]:
# Confusion Matrix for Logistic Regression
plt.figure(figsize=(6, 4))
sns.heatmap(confusion_matrix(y_test, lr_predictions), annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Logistic Regression')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
# 2. Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(random_state=42, n_estimators=100)
rf_model.fit(X_train_balanced, y_train_balanced)
rf_predictions = rf_model.predict(X_test_scaled)

In [None]:
print("\nRandom Forest Results:")
print("Accuracy:", accuracy_score(y_test, rf_predictions))
print("Precision:", precision_score(y_test, rf_predictions))
print("Recall:", recall_score(y_test, rf_predictions))
print("F1 Score:", f1_score(y_test, rf_predictions))
print("\nClassification Report:")
print(classification_report(y_test, rf_predictions))

In [None]:
# Confusion Matrix for Random Forest
plt.figure(figsize=(6, 4))
sns.heatmap(confusion_matrix(y_test, rf_predictions), annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Random Forest')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
# 3. XGBoost Classifier
import xgboost as xgb

xgb_model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
xgb_model.fit(X_train_balanced, y_train_balanced)
xgb_predictions = xgb_model.predict(X_test_scaled)

In [None]:
print("\nXGBoost Results:")
print("Accuracy:", accuracy_score(y_test, xgb_predictions))
print("Precision:", precision_score(y_test, xgb_predictions))
print("Recall:", recall_score(y_test, xgb_predictions))
print("F1 Score:", f1_score(y_test, xgb_predictions))
print("\nClassification Report:")
print(classification_report(y_test, xgb_predictions))

In [None]:
# Confusion Matrix for XGBoost
plt.figure(figsize=(6, 4))
sns.heatmap(confusion_matrix(y_test, xgb_predictions), annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - XGBoost')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
# Feature Importance from Random Forest
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': rf_model.feature_importances_
}).sort_values('Importance', ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feature_importance)
plt.title('Feature Importance - Random Forest')
plt.show()

<h2 align="center" style="color: orange;">Customer Churn Prediction Model Using ANN</h2>

In [None]:
X = df.drop(columns=['Exited'])
y = df['Exited'].values

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [2]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train_trf = scaler.fit_transform(X_train)
X_test_trf = scaler.transform(X_test)

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject