In [1]:
import pandas  as pd
import numpy   as np

db = pd.read_excel('database_fixed.xlsx',sheet_name='db_merged_bill_demo') # Loads dataset

In [2]:
db.drop(labels=['ID'], axis=1, inplace=True) # Removes the column 'ID'

objs = [var for var in db.columns if db[var].dtypes=='object' and var !='default payment in Jan 2022'] # Finds the columns with 'object' type

In [3]:
from sklearn import preprocessing

floats = [var for var in db.columns if var != 'default payment in Jan 2022' and ( db[var].dtypes=='float64' or db[var].dtypes=='int64' or db[var].dtypes=='uint8')]
# Finds columns with numerics only

for col in floats:
    db[col] = preprocessing.scale(db[col]) # Normalized the numerical values by the scaling process. This is needed for the next analysis.

z = np.abs(db._get_numeric_data())
db = db [(z<3).all(axis=1)] # Removes normalized values larger than 3.

In [4]:
db = pd.get_dummies(db,columns=objs) # Creates dummies based on the groups of variables in each object

In [5]:
db.to_excel('data_normalized_full_dummies.xlsx', sheet_name='db_merged_bill_demo', index = False) # Stores the normalized database with dummy columns.

In [5]:
import seaborn as sbn # A python library for Data visualization
import matplotlib.pyplot as plt # A python library for plotting data
%matplotlib inline 
# It directly shows plots below the code cell

In [6]:
# fig, ax = plt.subplots(figsize=(65,65)) # Sample figsize in inches
# color = plt.get_cmap('RdYlGn')   # default color
# color.set_bad('lightblue')    # if a value is bad the color would be lightblue instead of white
# sbn.heatmap(db.corr(method ='pearson'),annot=True,annot_kws={'size':22}, cmap=color, linewidths=.01, ax=ax)
# sbn.set(font_scale=5.5)

corr = db.corr(method ='pearson')
corr.style.background_gradient(cmap='coolwarm')


# One can change 'method=' to 'kendall' or 'spearman'. 
# Based on findings for this specific dataset, the three different methods predict the same thing.

Unnamed: 0,LIMIT_BAL,AGE,default payment in Jan 2022,DATE-2021-07-01_no,DATE-2021-07-01_yes,DATE-2021-08-01_no,DATE-2021-08-01_yes,DATE-2021-09-01_no,DATE-2021-09-01_yes,DATE-2021-10-01_no,DATE-2021-10-01_yes,DATE-2021-11-01_no,DATE-2021-11-01_yes,DATE-2021-12-01_no,DATE-2021-12-01_yes,ACCOUNT_TYPE_Current account,ACCOUNT_TYPE_Salary account,ACCOUNT_TYPE_Savings account,GENDER_Female,GENDER_Male,MARITAL_STATUS_Divorced,MARITAL_STATUS_Married,MARITAL_STATUS_Single,MARITAL_STATUS_unknown,EDUCATION_College degree,EDUCATION_High school,EDUCATION_Middle school,EDUCATION_No education,EDUCATION_Primary school,EDUCATION_Secondary school,EDUCATION_Vocational degree,EDUCATION_unknown,JOB_TYPE_Administration,JOB_TYPE_Business owner,JOB_TYPE_Entreprenuer,JOB_TYPE_House help,JOB_TYPE_Management,JOB_TYPE_Part-time,JOB_TYPE_Retired,JOB_TYPE_Services,JOB_TYPE_Student,JOB_TYPE_Technician,JOB_TYPE_Unemployed,JOB_TYPE_unknown,HOUSING_no,HOUSING_unknown,HOUSING_yes,LOAN_no,LOAN_unknown,LOAN_yes
LIMIT_BAL,1.0,0.139055,-0.098604,0.087798,-0.087798,0.076133,-0.076133,0.065351,-0.065351,0.058265,-0.058265,0.034759,-0.034759,0.001981,-0.001981,0.034379,-0.017271,-0.029385,0.027064,-0.027064,-0.005176,0.008938,-0.006657,0.005332,0.023223,0.001238,-0.01887,-0.009768,-0.033468,-0.005967,0.029851,-0.006687,0.006536,-0.010763,-0.010326,-0.015322,0.021,-0.000205,-0.004287,0.008476,-0.003882,-0.002294,-0.004817,-0.003707,-0.004446,-0.001583,0.004926,0.022504,0.001568,-0.024584
AGE,0.139055,1.0,0.010851,0.024663,-0.024663,0.024203,-0.024203,0.023529,-0.023529,0.028297,-0.028297,0.016535,-0.016535,0.010357,-0.010357,-0.001233,0.002456,-0.001911,-0.090095,0.090095,-0.007651,0.009983,-0.005821,0.003717,-0.005759,0.009891,0.004895,-0.005285,-0.004412,0.000329,-0.006513,0.003814,-0.003601,-0.012165,0.010464,-0.006438,-0.007008,0.014929,-0.009374,0.005988,-0.003971,-0.002559,0.004393,-0.008126,0.003209,-0.002019,-0.002579,-0.000589,-0.000196,0.00071
default payment in Jan 2022,-0.098604,0.010851,1.0,0.018544,-0.018544,0.007982,-0.007982,0.007084,-0.007084,0.015348,-0.015348,0.018188,-0.018188,0.004644,-0.004644,-0.178128,0.083571,0.161801,-0.0253,0.0253,-0.014428,0.005045,0.004944,0.001322,-0.070197,-0.040095,0.090134,0.000484,0.129562,0.029112,-0.124339,0.053905,-0.044801,0.06445,0.038638,0.044253,-0.028531,-0.02322,0.005783,-0.02504,0.012054,-0.001284,0.065947,-0.00288,-0.00163,-0.003395,0.002675,-0.10337,0.009428,0.105742
DATE-2021-07-01_no,0.087798,0.024663,0.018544,1.0,-1.0,0.621863,-0.621863,0.533436,-0.533436,0.486839,-0.486839,0.436123,-0.436123,0.38245,-0.38245,-0.010944,0.00344,0.012676,0.047682,-0.047682,0.002032,-0.001593,0.000546,-0.002742,-0.00515,-0.007383,0.011766,-0.000806,0.008457,-4.1e-05,-0.006792,0.006907,0.000923,0.001725,-0.000499,-0.001372,-0.006813,0.001074,0.000882,0.002837,-0.006057,-0.003778,0.012423,-0.003225,-0.010703,-0.00252,0.011459,-0.002381,0.001589,0.001843
DATE-2021-07-01_yes,-0.087798,-0.024663,-0.018544,-1.0,1.0,-0.621863,0.621863,-0.533436,0.533436,-0.486839,0.486839,-0.436123,0.436123,-0.38245,0.38245,0.010944,-0.00344,-0.012676,-0.047682,0.047682,-0.002032,0.001593,-0.000546,0.002742,0.00515,0.007383,-0.011766,0.000806,-0.008457,4.1e-05,0.006792,-0.006907,-0.000923,-0.001725,0.000499,0.001372,0.006813,-0.001074,-0.000882,-0.002837,0.006057,0.003778,-0.012423,0.003225,0.010703,0.00252,-0.011459,0.002381,-0.001589,-0.001843
DATE-2021-08-01_no,0.076133,0.024203,0.007982,0.621863,-0.621863,1.0,-1.0,0.665885,-0.665885,0.592641,-0.592641,0.521034,-0.521034,0.454733,-0.454733,-0.014745,0.010156,0.008168,0.050359,-0.050359,0.000101,0.002609,-0.002459,-0.005119,-0.000193,-0.005612,-0.003598,-0.007234,0.007887,-0.00705,0.003914,0.007339,-0.004965,0.000215,0.001448,-0.004296,-0.001665,-0.000964,-0.004836,0.009668,-0.006113,0.00105,0.007866,0.00203,-0.008024,-0.001515,0.008474,-0.003538,-0.000206,0.003847
DATE-2021-08-01_yes,-0.076133,-0.024203,-0.007982,-0.621863,0.621863,-1.0,1.0,-0.665885,0.665885,-0.592641,0.592641,-0.521034,0.521034,-0.454733,0.454733,0.014745,-0.010156,-0.008168,-0.050359,0.050359,-0.000101,-0.002609,0.002459,0.005119,0.000193,0.005612,0.003598,0.007234,-0.007887,0.00705,-0.003914,-0.007339,0.004965,-0.000215,-0.001448,0.004296,0.001665,0.000964,0.004836,-0.009668,0.006113,-0.00105,-0.007866,-0.00203,0.008024,0.001515,-0.008474,0.003538,0.000206,-0.003847
DATE-2021-09-01_no,0.065351,0.023529,0.007084,0.533436,-0.533436,0.665885,-0.665885,1.0,-1.0,0.71112,-0.71112,0.624562,-0.624562,0.540651,-0.540651,-0.012046,0.008389,0.006525,0.038808,-0.038808,-0.003194,-0.000946,0.004097,-0.006638,-0.005107,-0.004335,-0.005146,-0.007792,0.00667,-0.001706,0.009097,-0.003156,-0.004338,0.006036,0.006862,-0.005255,-0.007761,-0.002675,0.000776,0.015874,-0.000117,-0.001152,-0.004007,-0.004403,-0.003928,0.001661,0.003407,0.00464,-0.004526,-0.002974
DATE-2021-09-01_yes,-0.065351,-0.023529,-0.007084,-0.533436,0.533436,-0.665885,0.665885,-1.0,1.0,-0.71112,0.71112,-0.624562,0.624562,-0.540651,0.540651,0.012046,-0.008389,-0.006525,-0.038808,0.038808,0.003194,0.000946,-0.004097,0.006638,0.005107,0.004335,0.005146,0.007792,-0.00667,0.001706,-0.009097,0.003156,0.004338,-0.006036,-0.006862,0.005255,0.007761,0.002675,-0.000776,-0.015874,0.000117,0.001152,0.004007,0.004403,0.003928,-0.001661,-0.003407,-0.00464,0.004526,0.002974
DATE-2021-10-01_no,0.058265,0.028297,0.015348,0.486839,-0.486839,0.592641,-0.592641,0.71112,-0.71112,1.0,-1.0,0.704242,-0.704242,0.610697,-0.610697,-0.008881,0.003838,0.008598,0.034766,-0.034766,-0.005092,0.00728,-0.00403,-0.002958,-0.005414,-0.006257,-0.000922,-0.008274,0.010348,0.002418,0.001837,-0.002774,0.004266,0.00068,0.010448,-0.003071,-0.008792,-0.002399,-0.000459,0.007516,-0.009228,-0.003627,-0.001052,0.000753,-0.009726,0.005145,0.008116,0.00592,-0.00501,-0.004125
