In [4]:
# used random forest model to predict exited....................

import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# USING FUNCTION TO HANDLE_OUTLIERS
def handle_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5*IQR
    upper_bound = Q3 + 1.5*IQR
    df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
    return df



# DATASET
df = pd.read_csv(r"C:\Users\nh013\Desktop\bank churn dataset\Customer-Churn-Records.csv")

# FEATURE SELECT
df_selected = df[['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited', 'Complain', 'Satisfaction Score', 'Card Type']]

# IDENTIFY MISSING VALUES
print(df.isnull().sum())

# DROP ROWS WITH MISSING VALUES
df.dropna(inplace=True)

# FILL MISSING VALUES WITH MEAN
mean = df.mean()
df.fillna(mean, inplace=True)

# FILL MISSING VALUES WITH MODE
mode = df.mode().iloc[0]
df.fillna(mode, inplace=True)

# FILL MISSING VALUES WITH FORWARD FILL
df.fillna(method='ffill', inplace=True)

# FILL MISSING VALUES WITH BACKWARD FILL
df.fillna(method='bfill', inplace=True)

# DROP THE GEOGRAPHY COLUMN
df.drop('Geography', axis=1, inplace=True)


# DROP THE COLUMN
df.drop('Surname', axis=1, inplace=True)


# ONE HOT ENCODING
df = pd.get_dummies(df, columns=['Gender','Card Type'])

# HANDLE OUTLIERS
df = handle_outliers(df, 'Exited')

# REMOVE DUPLICATE ROWS
df.drop_duplicates(inplace=True)

# NORMALIZE AND SCALE ALL NUMERICAL COLUMNS
scaler = MinMaxScaler()
num_cols = df.select_dtypes(include='number').columns
df[num_cols] = scaler.fit_transform(df[num_cols])

scaler = StandardScaler()
num_cols = df.select_dtypes(include='number').columns
df[num_cols] = scaler.fit_transform(df[num_cols])

# ADD ROLLING MEAN AND STANDARD DEVIATION FEATURES
rolling_cols = ['CreditScore', 'Age', 'Balance']
window_size = 3

for col in rolling_cols:
    df[f'{col}_rolling_mean'] = df[col].rolling(window=window_size).mean().fillna(0)
    df[f'{col}_rolling_std'] = df[col].rolling(window=window_size).std().fillna(0)

# SPLITT DATA
X = df.drop('Exited', axis=1)
y = df['Exited']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# RANDOM FOREST MODEL
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

# PREDICT ON THE TEST SET
y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision, recall, f1_score, _ = classification_report(y_test, y_pred).split()[-4:]


# EVALUATE PERFORMENCE
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1_score)
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("Target Variable:")
print(y)

RowNumber             0
CustomerId            0
Surname               0
CreditScore           0
Geography             0
Gender                0
Age                   0
Tenure                0
Balance               0
NumOfProducts         0
HasCrCard             0
IsActiveMember        0
EstimatedSalary       0
Exited                0
Complain              0
Satisfaction Score    0
Card Type             0
Point Earned          0
dtype: int64


  mean = df.mean()


Accuracy: 1.0
Precision: 1.00
Recall: 1.00
F1-score: 1.00
Confusion Matrix:
[[1593]]
Target Variable:
1       0.0
3       0.0
4       0.0
6       0.0
8       0.0
       ... 
9993    0.0
9994    0.0
9995    0.0
9996    0.0
9999    0.0
Name: Exited, Length: 7962, dtype: float64


In [5]:
#USED NEURAL NETWORL MODEL MLPCLASSIFIER .................

import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report


# USING FUNCTION TO HANDLE_OUTLIERS
def handle_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5*IQR
    upper_bound = Q3 + 1.5*IQR
    df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
    return df


# DATASET
df = pd.read_csv(r"C:\Users\nh013\Desktop\bank churn dataset\Customer-Churn-Records.csv")

# FEATURE SELECT
df_selected = df[['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited', 'Complain', 'Satisfaction Score', 'Card Type']]

# IDENTIFY MISSING VALUES
print(df.isnull().sum())

# DROP ROWS WITH MISSING VALUES
df.dropna(inplace=True)

# FILL MISSING VALUES WITH MEAN
mean = df.mean()
df.fillna(mean, inplace=True)

# FILL MISSING VALUES WITH MODE
mode = df.mode().iloc[0]
df.fillna(mode, inplace=True)

# FILL MISSING VALUES WITH FORWARD FILL
df.fillna(method='ffill', inplace=True)

# FILL MISSING VALUES WITH BACKWARD FILL
df.fillna(method='bfill', inplace=True)

# DROP THE GEOGRAPHY COLUMN
df.drop('Geography', axis=1, inplace=True)

# DROP THE COLUMN
df.drop('Surname', axis=1, inplace=True)



# ONE HOT ENCODING
df = pd.get_dummies(df, columns=['Gender','Card Type'])

# HANDLE OUTLIERS
df = handle_outliers(df, 'Exited')

# REMOVE DUPLICATE ROWS
df.drop_duplicates(inplace=True)

# NORMALIZE AND SCALE ALL NUMERICAL COLUMNS
scaler = MinMaxScaler()
num_cols = df.select_dtypes(include='number').columns
df[num_cols] = scaler.fit_transform(df[num_cols])

scaler = StandardScaler()
num_cols = df.select_dtypes(include='number').columns
df[num_cols] = scaler.fit_transform(df[num_cols])

# ADD ROLLING MEAN AND STANDARD DEVIATION FEATURES
rolling_cols = ['CreditScore', 'Age', 'Balance']
window_size = 3

# Compute rolling mean and standard deviation for selected columns
for col in rolling_cols:
    df[f'{col}_rolling_mean'] = df[col].rolling(window=window_size).mean().fillna(0)
    df[f'{col}_rolling_std'] = df[col].rolling(window=window_size).std().fillna(0)

    


# SPLIT DATA
X = df.drop('Exited', axis=1)
y = df['Exited']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# NEURAL NETWORK MODEL
nn_model = MLPClassifier()
nn_model.fit(X_train, y_train)

# PREDICT ON TEST SET
y_pred = nn_model.predict(X_test)


print("Target Variable in Test Set:")
print(y_test)

print("Predicted Values:")
print(y_pred)


accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)


RowNumber             0
CustomerId            0
Surname               0
CreditScore           0
Geography             0
Gender                0
Age                   0
Tenure                0
Balance               0
NumOfProducts         0
HasCrCard             0
IsActiveMember        0
EstimatedSalary       0
Exited                0
Complain              0
Satisfaction Score    0
Card Type             0
Point Earned          0
dtype: int64


  mean = df.mean()


Target Variable in Test Set:
2175    0.0
1979    0.0
1932    0.0
1518    0.0
4455    0.0
       ... 
337     0.0
1       0.0
1828    0.0
4794    0.0
5648    0.0
Name: Exited, Length: 1593, dtype: float64
Predicted Values:
[0. 0. 0. ... 0. 0. 0.]
Accuracy: 1.0
Precision: 0.0
Recall: 0.0
F1-score: 0.0
Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      1593

    accuracy                           1.00      1593
   macro avg       1.00      1.00      1.00      1593
weighted avg       1.00      1.00      1.00      1593



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
