In [3]:
!pip install seaborn matplotlib imblearn

Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.0-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting imblearn
  Downloading imblearn-0.0-py2.py3-none-any.whl.metadata (355 bytes)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.1-cp313-cp313-win_amd64.whl.metadata (5.4 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.55.3-cp313-cp313-win_amd64.whl.metadata (168 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.8-cp313-cp313-win_amd64.whl.metadata (6.3 kB)
Collecting pillow>=8 (from matplotlib)
  Downloading pillow-11.1.0-cp313-cp313-win_amd64.whl.metadata (9.3 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Downloading pyparsing-3.2.1-py3-none-any.whl.metadata (5.0 kB)
Collecting imbalanced-learn (from imble

In [4]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle

In [7]:
# Load the dataset
df = pd.read_csv('C:\\Users\\HP\\Downloads\\Customer-Churn-Records.csv')

# Display basic information about the dataset
print(df.info())
print(df.isna().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 18 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   RowNumber           10000 non-null  int64  
 1   CustomerId          10000 non-null  int64  
 2   Surname             10000 non-null  object 
 3   CreditScore         10000 non-null  int64  
 4   Geography           10000 non-null  object 
 5   Gender              10000 non-null  object 
 6   Age                 10000 non-null  int64  
 7   Tenure              10000 non-null  int64  
 8   Balance             10000 non-null  float64
 9   NumOfProducts       10000 non-null  int64  
 10  HasCrCard           10000 non-null  int64  
 11  IsActiveMember      10000 non-null  int64  
 12  EstimatedSalary     10000 non-null  float64
 13  Exited              10000 non-null  int64  
 14  Complain            10000 non-null  int64  
 15  Satisfaction Score  10000 non-null  int64  
 16  Card 

In [8]:
# Define numerical and categorical columns for reference
numerical_columns = ['RowNumber', 'CustomerId', 'Surname', 'CreditScore',
                     'Age', 'Balance', 'EstimatedSalary', 'Point Earned']

In [9]:
# Map Card Type to numerical values
def map_card_tiers(df, column_name):
    card_map = {'SILVER': 1, 'GOLD': 2, 'DIAMOND': 3, 'PLATINUM': 4}
    df['Card_Tier'] = df[column_name].map(card_map)
    return df

df = map_card_tiers(df, 'Card Type')

In [10]:
# Clean column names by replacing spaces with underscores
df.columns = df.columns.str.replace(' ', '_')

# Remove unnecessary columns
df = df.drop(['RowNumber', 'CustomerId', 'Surname', 'Card_Type'], axis=1)

In [11]:
# Handle categorical variables using one-hot encoding
df_dummies = pd.get_dummies(df[['Geography', 'Gender']], dtype=int)
df_clean = pd.concat([df, df_dummies], axis=1)
df_clean = df_clean.drop(['Geography', 'Gender'], axis=1)

In [12]:
# Rename the target variable for clarity
df_clean['Churn'] = df_clean['Exited']
df_clean = df_clean.drop(['Exited'], axis=1)
df_clean.head(2)

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Complain,Satisfaction_Score,Point_Earned,Card_Tier,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male,Churn
0,619,42,2,0.0,1,1,1,101348.88,1,2,464,3,1,0,0,1,0,1
1,608,41,1,83807.86,1,0,1,112542.58,1,3,456,3,0,0,1,1,0,0


In [13]:
# Oversample the minority class using SMOTE
smote = SMOTE(random_state=42)
X = df_clean.drop(columns='Churn')
y = df_clean['Churn']
X_resampled, y_resampled = smote.fit_resample(X, y)

In [14]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Initialize the Random Forest model
rfc = RandomForestClassifier(random_state=42)

In [15]:
# Define hyperparameter grid for optimization
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [16]:
# Perform hyperparameter tuning using GridSearchCV
grid_search = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=2)

In [17]:
# Train the model with the best parameters
print("Starting GridSearchCV...")
grid_search.fit(X_train, y_train)

Starting GridSearchCV...
Fitting 5 folds for each of 108 candidates, totalling 540 fits


In [18]:
# Display the best parameters
print(f"Best Parameters: {grid_search.best_params_}")

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}


In [19]:
# Evaluate the model using the test set
best_model = grid_search.best_estimator_
y_test_pred = best_model.predict(X_test)

In [20]:
# Calculate accuracy and display classification metrics
accuracy = accuracy_score(y_test, y_test_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test, y_test_pred))

Accuracy: 1.00
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1651
           1       1.00      1.00      1.00      1534

    accuracy                           1.00      3185
   macro avg       1.00      1.00      1.00      3185
weighted avg       1.00      1.00      1.00      3185



In [21]:
# Save the trained model for future use
with open('customer_churn_model.pkl', 'wb') as file:
    pickle.dump(best_model, file)

print("Model saved successfully!")


Model saved successfully!
