<a href="https://colab.research.google.com/github/suremahitha2006-blip/credit-score-model/blob/main/notebooks/credit-score-model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score


In [None]:
df = pd.read_csv("BankChurners.csv")
print("Initial DF shape:", df.shape)


Initial DF shape: (10127, 23)


In [None]:
print(df["Attrition_Flag"].unique())


['Existing Customer' 'Attrited Customer']


In [None]:
df["Attrition_Flag"] = df["Attrition_Flag"].str.strip()

df["Attrition_Flag"] = df["Attrition_Flag"].replace({
    "Existing Customer": 1,
    "Attrited Customer": 0
})


  df["Attrition_Flag"] = df["Attrition_Flag"].replace({


In [None]:
print(df["Attrition_Flag"].value_counts(dropna=False))


Attrition_Flag
1    8500
0    1627
Name: count, dtype: int64


In [None]:
df = df.dropna(subset=["Attrition_Flag"])
print("After target cleaning:", df.shape)


After target cleaning: (10127, 23)


In [None]:
df.drop(columns=["CLIENTNUM"], inplace=True, errors="ignore")

df.drop(columns=[
    "Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1",
    "Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2"
], inplace=True, errors="ignore")


In [None]:
df["Debt_Ratio"] = df["Total_Revolving_Bal"] / df["Credit_Limit"]
df["Debt_Ratio"].fillna(0, inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Debt_Ratio"].fillna(0, inplace=True)


In [None]:
X = df.drop("Attrition_Flag", axis=1)
y = df["Attrition_Flag"]


In [None]:
X = pd.get_dummies(X, drop_first=True)


In [None]:
X = X.fillna(X.median())


In [None]:
print("Final DF shape:", df.shape)
print("X shape:", X.shape)
print("y shape:", y.shape)
print(y.value_counts())


Final DF shape: (10127, 21)
X shape: (10127, 33)
y shape: (10127,)
Attrition_Flag
1    8500
0    1627
Name: count, dtype: int64


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)
print(classification_report(y_test, y_pred))

y_prob = lr.predict_proba(X_test)[:, 1]
print("ROC-AUC:", roc_auc_score(y_test, y_prob))


              precision    recall  f1-score   support

           0       0.76      0.54      0.63       327
           1       0.92      0.97      0.94      1699

    accuracy                           0.90      2026
   macro avg       0.84      0.75      0.78      2026
weighted avg       0.89      0.90      0.89      2026

ROC-AUC: 0.9173303958255711


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)
print(classification_report(y_test, y_pred))

y_prob = lr.predict_proba(X_test)[:, 1]
print("ROC-AUC:", roc_auc_score(y_test, y_prob))


              precision    recall  f1-score   support

           0       0.76      0.54      0.63       327
           1       0.92      0.97      0.94      1699

    accuracy                           0.90      2026
   macro avg       0.84      0.75      0.78      2026
weighted avg       0.89      0.90      0.89      2026

ROC-AUC: 0.9173303958255711


In [None]:
from sklearn.ensemble import RandomForestClassifier

# Create model
rf = RandomForestClassifier(
    n_estimators=100,   # number of trees
    random_state=42
)

# Train
rf.fit(X_train, y_train)

# Predict
y_pred_rf = rf.predict(X_test)

print("RANDOM FOREST RESULTS")
print(classification_report(y_test, y_pred_rf))

# ROC-AUC
y_prob_rf = rf.predict_proba(X_test)[:, 1]
print("Random Forest ROC-AUC:",
      roc_auc_score(y_test, y_prob_rf))


RANDOM FOREST RESULTS
              precision    recall  f1-score   support

           0       0.92      0.77      0.84       327
           1       0.96      0.99      0.97      1699

    accuracy                           0.95      2026
   macro avg       0.94      0.88      0.90      2026
weighted avg       0.95      0.95      0.95      2026

Random Forest ROC-AUC: 0.9869810087963238
