This python notebook explore customer credit risk classification using the credit score metric formulated using the available customer record and not the the CIBIL-provided credit score.

In [32]:
import pickle
import os.path as path
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBRegressor, XGBClassifier
from pipeline_components import Stage1Classifier, Stage2Classifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.pipeline import Pipeline

In [108]:
df = pd.read_pickle('../artifacts/full_data_nonull.pkl')
print(df.shape)

df.info()

(42064, 84)
<class 'pandas.core.frame.DataFrame'>
Index: 42064 entries, 0 to 51335
Data columns (total 84 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   PROSPECTID                    42064 non-null  int64  
 1   Total_TL                      42064 non-null  int64  
 2   Tot_Closed_TL                 42064 non-null  int64  
 3   Tot_Active_TL                 42064 non-null  int64  
 4   Total_TL_opened_L6M           42064 non-null  int64  
 5   Tot_TL_closed_L6M             42064 non-null  int64  
 6   pct_tl_open_L6M               42064 non-null  float64
 7   pct_tl_closed_L6M             42064 non-null  float64
 8   pct_active_tl                 42064 non-null  float64
 9   pct_closed_tl                 42064 non-null  float64
 10  Total_TL_opened_L12M          42064 non-null  int64  
 11  Tot_TL_closed_L12M            42064 non-null  int64  
 12  pct_tl_open_L12M              42064 non-null  float64

### Preprocessing

In [109]:
print('categorical columns: ', df.select_dtypes(include=['object']).columns, end='\n\n')

cat_col = df.select_dtypes(include='object').columns
for col in cat_col:
    print(col, ": ", df[col].unique())

categorical columns:  Index(['MARITALSTATUS', 'EDUCATION', 'GENDER', 'last_prod_enq2',
       'first_prod_enq2', 'Approved_Flag'],
      dtype='object')

MARITALSTATUS :  ['Married' 'Single']
EDUCATION :  ['12TH' 'GRADUATE' 'SSC' 'POST-GRADUATE' 'UNDER GRADUATE' 'OTHERS'
 'PROFESSIONAL']
GENDER :  ['M' 'F']
last_prod_enq2 :  ['PL' 'ConsumerLoan' 'AL' 'CC' 'others' 'HL']
first_prod_enq2 :  ['PL' 'ConsumerLoan' 'others' 'AL' 'HL' 'CC']
Approved_Flag :  ['P2' 'P1' 'P3' 'P4']


In [110]:
# one-hot encoding
df = pd.get_dummies(df, columns=['MARITALSTATUS', 'GENDER', 'last_prod_enq2', 'first_prod_enq2'])

#label encoding
encode_map = {'SSC':1, 'OTHERS': 1, '12TH': 2, 'GRADUATE': 3, 'UNDER GRADUATE': 3, 'POST-GRADUATE': 4, 'PROFESSIONAL': 4}
df['EDUCATION'] = df['EDUCATION'].map(encode_map)

In [111]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(['Approved_Flag', 'PROSPECTID', 'Credit_Score'], axis=1), 
                 LabelEncoder().fit_transform(df['Credit_Score']),
                 test_size=0.3, random_state=42)
print(X_train.shape, X_test.shape)

(29444, 93) (12620, 93)


In [85]:
xgb_credit_score = XGBRegressor(learning_rate = 0.05, max_depth= 5, min_child_weight= 5, n_estimators= 200, random_state=42, n_jobs=1)
xgb_credit_score.fit(X_train, y_train)

In [86]:
xgb_credit_score.predict(X_test)

array([124.67464 , 113.31796 , 130.24362 , ..., 134.77286 , 132.67502 ,
       101.467926], dtype=float32)

In [87]:
df['Pred_Credit_Score'] = xgb_credit_score.predict(df.drop(['Approved_Flag', 'PROSPECTID', 'Credit_Score'], axis=1))

### Classification

In [88]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(['Approved_Flag', 'PROSPECTID', 'Credit_Score'], axis=1), 
                                                    LabelEncoder().fit_transform(df['Approved_Flag']),
                                                    test_size=0.3, random_state=42)

In [89]:
DecisionTreeClassifier().fit(X_train, y_train).score(X_test, y_test)

0.7370839936608558

In [23]:
RandomForestClassifier().fit(X_train, y_train).score(X_test, y_test)

0.7900158478605388

In [24]:
XGBClassifier().fit(X_train, y_train).score(X_test, y_test)

0.7857369255150555

In [26]:
print(classification_report(y_test, XGBClassifier().fit(X_train, y_train).predict(X_test)))

              precision    recall  f1-score   support

           0       0.84      0.77      0.81      1537
           1       0.84      0.91      0.87      7557
           2       0.45      0.34      0.39      1975
           3       0.77      0.75      0.76      1551

    accuracy                           0.79     12620
   macro avg       0.73      0.70      0.71     12620
weighted avg       0.77      0.79      0.78     12620



In [69]:
XGBClassifier(random_state=42).fit(X_train, y_train).get_booster().save_config()

'{"learner":{"generic_param":{"device":"cpu","fail_on_invalid_gpu_id":"0","n_jobs":"0","nthread":"0","random_state":"42","seed":"42","seed_per_iteration":"0","validate_parameters":"1"},"gradient_booster":{"gbtree_model_param":{"num_parallel_tree":"1","num_trees":"400"},"gbtree_train_param":{"process_type":"default","tree_method":"auto","updater":"grow_quantile_histmaker","updater_seq":"grow_quantile_histmaker"},"name":"gbtree","specified_updater":false,"tree_train_param":{"alpha":"0","cache_opt":"1","colsample_bylevel":"1","colsample_bynode":"1","colsample_bytree":"1","eta":"0.300000012","gamma":"0","grow_policy":"depthwise","interaction_constraints":"","lambda":"1","learning_rate":"0.300000012","max_bin":"256","max_cat_threshold":"64","max_cat_to_onehot":"4","max_delta_step":"0","max_depth":"6","max_leaves":"0","min_child_weight":"1","min_split_loss":"0","monotone_constraints":"()","refresh_leaf":"1","reg_alpha":"0","reg_lambda":"1","sampling_method":"uniform","sketch_ratio":"2","spar

In [101]:

xgb_param_grid = {
    'n_estimators': [100, 200],  # Increase trees for more learning
    'max_depth': [8, 10],  # Allow deeper trees
    'learning_rate': [0.1, 0.2, 0.4],  # Avoid too low learning rates
    'gamma': [0, 0.1, 0.3],  # Introduce regularization
}

xgb_grid_search = GridSearchCV(estimator=XGBClassifier(random_state=42, n_jobs=-1), 
                           param_grid=xgb_param_grid, 
                           scoring='f1_weighted',  # F1 score as the metric
                           cv=3,  # 3-fold cross-validation
                           verbose=1,  # Verbosity for debugging
                           error_score="raise",
                           n_jobs=-1)  # Parallel processing

xgb_grid_search.fit(X_train, y_train)
xgb_grid_search.best_score_

Fitting 3 folds for each of 36 candidates, totalling 108 fits


0.7928495528622915

In [95]:
X_train.isna().sum().sum()

0

In [97]:
np.isinf(X_train).sum().sum()

0

In [99]:
 np.unique(y_train)

array([0, 1, 2, 3])

In [67]:
pipeline = Pipeline(steps=[('stage1', Stage1Classifier(base_model=DecisionTreeClassifier(random_state=42))),
                           ('stage2', Stage2Classifier(p1_p3_model=XGBClassifier(random_state=42, n_jobs=-1), 
                                                       p2_p4_model=DecisionTreeClassifier(random_state=42),
                                                       p1_p3_param_grid={}))
                           ])

pipeline.fit(X_train, y_train)

p1-p3 model score: 0.9591823864321101


In [66]:
pipeline.named_steps['stage2'].p1_p3_model

In [35]:
print(classification_report(y_test, pipeline.predict(X_test)))

              precision    recall  f1-score   support

           0       0.76      0.79      0.77      1537
           1       0.83      0.84      0.83      7557
           2       0.37      0.34      0.35      1975
           3       0.71      0.72      0.71      1551

    accuracy                           0.74     12620
   macro avg       0.67      0.67      0.67     12620
weighted avg       0.73      0.74      0.74     12620



In [103]:
xgb_param_grid = {
    'n_estimators': [100, 200],  # Increase trees for more learning
    'max_depth': [8, 10],  # Allow deeper trees
    'learning_rate': [0.1, 0.2, 0.4],  # Avoid too low learning rates
    'gamma': [0, 0.1, 0.3],  # Introduce regularization
}


pipeline_tuning = Pipeline(steps=[('stage1', Stage1Classifier(base_model=DecisionTreeClassifier(random_state=42))),
                           ('stage2', Stage2Classifier(p1_p3_model=XGBClassifier(random_state=42, n_jobs=-1), 
                                                       p2_p4_model=DecisionTreeClassifier(random_state=42),
                                                       p1_p3_param_grid=xgb_param_grid))
                           ])

pipeline_tuning.fit(X_train, y_train)

p1-p3 model score: 0.9602334091848809


In [104]:
print(classification_report(y_test, pipeline.predict(X_test)))

              precision    recall  f1-score   support

           0       0.76      0.79      0.77      1537
           1       0.83      0.84      0.83      7557
           2       0.37      0.34      0.35      1975
           3       0.71      0.72      0.71      1551

    accuracy                           0.74     12620
   macro avg       0.67      0.67      0.67     12620
weighted avg       0.73      0.74      0.74     12620



In [61]:
from scipy.stats import f_oneway

df_new = df.drop('Credit_Score', axis=1)
col_keep = []
annova_pvalues = pd.DataFrame(columns=['feature', 'pvalue'])

# categorical vs numeric feature using anova test
for feature in df_new.select_dtypes(exclude='object').columns:
    x = list(df_new[feature]) 
    y = list(df_new['Approved_Flag'])
    
    group_P1 = [value for value, group in zip(x, y) if group == 'P1']
    group_P2 = [value for value, group in zip(x, y) if group == 'P2']
    group_P3 = [value for value, group in zip(x, y) if group == 'P3']
    group_P4 = [value for value, group in zip(x, y) if group == 'P4']

    f_statistic, p_value = f_oneway(group_P1, group_P2, group_P3, group_P4)

    if p_value <= 0.05:
        col_keep.append(feature)
    annova_pvalues.loc[len(annova_pvalues)] = [feature, p_value]
    
num_corr = len(col_keep)
print(num_corr, 'numeric features show correlation with target variable (Approval_Flag)')

92 numeric features show correlation with target variable (Approval_Flag)


In [62]:
annova_pvalues.sort_values(by='pvalue', ascending=False)

Unnamed: 0,feature,pvalue
69,pct_currentBal_all_TL,0.608276
49,num_lss_6mts,0.558651
50,num_lss_12mts,0.354899
0,PROSPECTID,0.042167
46,num_dbt_6mts,0.033672
...,...,...
64,AGE,0.000000
58,PL_enq_L12m,0.000000
62,enq_L3m,0.000000
61,enq_L6m,0.000000


The pvalue more than 0.05 are very large. we can ignore them. Morevoer decision tree and there ensemble would not require annova testing. They automatically determine feature importance during training.



In [None]:
df.groupby('Approved_Flag')['Credit_Score'].agg(['min', 'max'])

Unnamed: 0_level_0,min,max
Approved_Flag,Unnamed: 1_level_1,Unnamed: 2_level_1
P1,701,809
P2,669,700
P3,489,776
P4,469,658


- It is observed that the credit risk classification performs suboptimally when using the formulated credit score (with an R² and RMSE of around 0.9 and 6) instead of the CIBIL-provided credit score.

- The suboptimal classification performance could be due to minor inaccuracies in the credit score formulation, as the models (p2_p4_model and p1p3_p2p4_model) heavily relied on the credit score, evident from its high feature importance.

-  The small RMSE error in the credit score (within the 300-900 range) may have significantly impacted the classification outcomes. Alternatively, exploring other machine learning algorithms might yield better generalization.