In [1]:
import pandas  as pd
import numpy   as np

db = pd.read_excel('database_fixed.xlsx',sheet_name='db_merged_bill_demo')

In [2]:
db.drop(labels=['ID', 'DATE-2021-07-01',
                'DATE-2021-08-01',
                'DATE-2021-09-01',
                'DATE-2021-10-01',
                'DATE-2021-11-01',
                'DATE-2021-12-01'], axis=1, inplace=True) # Removes the uncorrelated components to the default making found in 'pair_correlations_01'


objs = [var for var in db.columns if db[var].dtypes=='object' and var !='default payment in Jan 2022'] # Finds the columns with 'object' type

In [3]:
from sklearn import preprocessing

floats = [var for var in db.columns if var != 'default payment in Jan 2022' and ( db[var].dtypes=='float64' or db[var].dtypes=='int64' or db[var].dtypes=='uint8')]
# Finds columns with numerics only

for col in floats:
    db[col] = preprocessing.scale(db[col]) # Normalized the numerical values by the scaling process. This is needed for the next analysis.

z = np.abs(db._get_numeric_data())
db = db [(z<3).all(axis=1)] # Removes normalized values larger than 3.

In [4]:
db = pd.get_dummies(db,columns=objs) # Creates dummies based on the groups of variables in each object

In [None]:
db.to_excel('data_normalized_partial_dummies.xlsx', sheet_name='db_merged_bill_demo', index = False) # Stores the normalized database with dummy columns.

In [5]:
import seaborn as sbn # A python library for Data visualization
import matplotlib.pyplot as plt # A python library for plotting data
%matplotlib inline 
# It directly shows plots below the code cell

In [6]:
# fig, ax = plt.subplots(figsize=(65,65)) # Sample figsize in inches
# color = plt.get_cmap('RdYlGn')   # default color
# color.set_bad('lightblue')    # if a value is bad the color would be lightblue instead of white
# sbn.heatmap(db.corr(method ='pearson'),annot=True,annot_kws={'size':22}, cmap='coolwarm', linewidths=.01, ax=ax)
# sbn.set(font_scale=5.5)

corr = db.corr(method ='pearson')
corr.style.background_gradient(cmap='coolwarm')


# One can change 'method' to 'kendall' or 'spearman'. 
# Based on findings for this specific dataset, the three different methods predict the same thing.

Unnamed: 0,LIMIT_BAL,AGE,default payment in Jan 2022,ACCOUNT_TYPE_Current account,ACCOUNT_TYPE_Salary account,ACCOUNT_TYPE_Savings account,GENDER_Female,GENDER_Male,MARITAL_STATUS_Divorced,MARITAL_STATUS_Married,MARITAL_STATUS_Single,MARITAL_STATUS_unknown,EDUCATION_College degree,EDUCATION_High school,EDUCATION_Middle school,EDUCATION_No education,EDUCATION_Primary school,EDUCATION_Secondary school,EDUCATION_Vocational degree,EDUCATION_unknown,JOB_TYPE_Administration,JOB_TYPE_Business owner,JOB_TYPE_Entreprenuer,JOB_TYPE_House help,JOB_TYPE_Management,JOB_TYPE_Part-time,JOB_TYPE_Retired,JOB_TYPE_Services,JOB_TYPE_Student,JOB_TYPE_Technician,JOB_TYPE_Unemployed,JOB_TYPE_unknown,HOUSING_no,HOUSING_unknown,HOUSING_yes,LOAN_no,LOAN_unknown,LOAN_yes
LIMIT_BAL,1.0,0.139055,-0.098604,0.034379,-0.017271,-0.029385,0.027064,-0.027064,-0.005176,0.008938,-0.006657,0.005332,0.023223,0.001238,-0.01887,-0.009768,-0.033468,-0.005967,0.029851,-0.006687,0.006536,-0.010763,-0.010326,-0.015322,0.021,-0.000205,-0.004287,0.008476,-0.003882,-0.002294,-0.004817,-0.003707,-0.004446,-0.001583,0.004926,0.022504,0.001568,-0.024584
AGE,0.139055,1.0,0.010851,-0.001233,0.002456,-0.001911,-0.090095,0.090095,-0.007651,0.009983,-0.005821,0.003717,-0.005759,0.009891,0.004895,-0.005285,-0.004412,0.000329,-0.006513,0.003814,-0.003601,-0.012165,0.010464,-0.006438,-0.007008,0.014929,-0.009374,0.005988,-0.003971,-0.002559,0.004393,-0.008126,0.003209,-0.002019,-0.002579,-0.000589,-0.000196,0.00071
default payment in Jan 2022,-0.098604,0.010851,1.0,-0.178128,0.083571,0.161801,-0.0253,0.0253,-0.014428,0.005045,0.004944,0.001322,-0.070197,-0.040095,0.090134,0.000484,0.129562,0.029112,-0.124339,0.053905,-0.044801,0.06445,0.038638,0.044253,-0.028531,-0.02322,0.005783,-0.02504,0.012054,-0.001284,0.065947,-0.00288,-0.00163,-0.003395,0.002675,-0.10337,0.009428,0.105742
ACCOUNT_TYPE_Current account,0.034379,-0.001233,-0.178128,1.0,-0.814466,-0.351092,0.01314,-0.01314,-0.001304,0.002093,-0.001903,0.0053,-0.080277,0.309878,0.26902,0.005782,0.083608,0.292319,-0.597008,-0.242156,0.328074,0.216721,-0.221749,0.149903,0.054272,0.44354,-0.186771,-0.338406,-0.101287,-0.485251,-0.191508,-0.093862,-0.001285,-0.004647,0.002718,0.486691,-0.165615,-0.445506
ACCOUNT_TYPE_Salary account,-0.017271,0.002456,0.083571,-0.814466,1.0,-0.257323,-0.007639,0.007639,0.008604,-0.005749,0.000452,-0.004223,-0.143202,-0.212848,-0.219149,-0.001031,-0.317669,-0.211255,0.732844,0.297046,-0.423883,-0.176512,0.196693,-0.11485,-0.126476,-0.413714,-0.047866,0.415495,0.12436,0.595607,0.235357,0.1161,0.004978,0.001011,-0.005279,-0.597149,0.203233,0.546604
ACCOUNT_TYPE_Savings account,-0.029385,-0.001911,0.161801,-0.351092,-0.257323,1.0,-0.009558,0.009558,-0.011714,0.005793,0.002439,-0.002012,0.364797,-0.172601,-0.094384,-0.007966,0.373405,-0.145927,-0.188358,-0.076067,0.137662,-0.076089,0.051895,-0.064316,0.113719,-0.071055,0.388308,-0.106916,-0.032001,-0.153015,-0.060866,-0.031037,-0.005893,0.006109,0.003993,0.153105,-0.05215,-0.140128
GENDER_Female,0.027064,-0.090095,-0.0253,0.01314,-0.007639,-0.009558,1.0,-1.0,-0.009084,-0.000709,0.007521,2.9e-05,-0.000145,-0.008799,-0.014976,-0.009355,0.004711,0.005165,0.009589,0.000648,0.01684,-0.005576,-0.009193,0.003049,-0.003475,-0.009303,-0.001533,-0.001769,-0.000759,-0.001567,0.009871,-0.001786,0.007424,-0.012228,-0.00363,0.005518,-0.006164,-0.0032
GENDER_Male,-0.027064,0.090095,0.0253,-0.01314,0.007639,0.009558,-1.0,1.0,0.009084,0.000709,-0.007521,-2.9e-05,0.000145,0.008799,0.014976,0.009355,-0.004711,-0.005165,-0.009589,-0.000648,-0.01684,0.005576,0.009193,-0.003049,0.003475,0.009303,0.001533,0.001769,0.000759,0.001567,-0.009871,0.001786,-0.007424,0.012228,0.00363,-0.005518,0.006164,0.0032
MARITAL_STATUS_Divorced,-0.005176,-0.007651,-0.014428,-0.001304,0.008604,-0.011714,-0.009084,0.009084,1.0,-0.475004,-0.210856,-0.016097,-0.003583,4.8e-05,0.003598,-0.004333,-0.017961,0.004088,0.006015,0.011425,-0.007871,0.000639,0.001296,-0.002329,0.002024,0.00033,-0.00423,0.001002,-0.000926,0.002029,0.007984,0.011357,-0.000154,0.0067,-0.001916,-0.005195,0.007717,0.002186
MARITAL_STATUS_Married,0.008938,0.009983,0.005045,0.002093,-0.005749,0.005793,-0.000709,0.000709,-0.475004,1.0,-0.755397,-0.05767,0.000407,0.008164,-0.015332,0.000852,0.008492,0.002384,-0.007512,-0.000429,-0.00166,-0.015093,-0.003329,0.011851,-0.00335,0.01013,-0.001227,0.001254,0.00269,-0.002138,-0.002087,-0.001693,0.009843,-0.005372,-0.008163,0.000109,-0.004242,0.001716


In [None]:
plt.savefig("Correlation_HeatMap.jpg")