<a id = 'scale'></a>
# Data Scaling

## Table of Content

[A. Individual Account](#indi)

[B. Joint Account](#joint)

In [1]:
# Importing Libraries

import numpy as np
import pandas as pd

In [2]:
# Mounting the Google Drive

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


<a id= 'indi'></a>
> ## A. Individual Account

In [3]:
inv_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Individual_scale.csv')

In [4]:
inv_df.skew()

loan_amnt                0.794664
int_rate                 0.754225
installment              1.017385
emp_length              -0.191110
annual_inc              44.940454
dti                      0.231028
open_acc                 1.300228
pub_rec                 11.483169
revol_bal                8.326329
revol_util              -0.006663
total_acc                0.991206
mort_acc                 1.689331
pub_rec_bankruptcies     3.406716
cibil_score              1.197485
dtype: float64

In [5]:
inv_df.head()

Unnamed: 0,loan_amnt,term,int_rate,installment,grade,sub_grade,emp_length,home_ownership,annual_inc,verification_status,loan_status,dti,open_acc,pub_rec,revol_bal,revol_util,total_acc,mort_acc,pub_rec_bankruptcies,cibil_score
0,202253.94,36 months,13.99,6912.03,C,C4,10,MORTGAGE,3089990.75,Not Verified,Yes,5.91,7,0,155342.26,29.7,13,1,0,677
1,1387686.76,36 months,11.99,46084.68,C,C1,10,MORTGAGE,3651807.25,Not Verified,Yes,16.06,22,0,1206220.03,19.2,38,4,0,717
2,1966357.75,60 months,14.85,46625.15,C,C5,10,MORTGAGE,6179981.5,Source Verified,No,17.06,13,0,438329.23,11.6,17,1,0,787
3,584289.16,60 months,22.45,16287.62,F,F1,3,MORTGAGE,5867218.25,Source Verified,Yes,25.37,12,0,1232007.4,64.5,35,6,0,697
4,671370.72,36 months,13.44,22763.68,C,C3,4,RENT,1910176.1,Source Verified,Yes,10.2,5,0,495634.52,68.4,6,0,0,692


In [6]:
# Columns Needed to Change Dtypes (Object-to-integer/datetime)

pd.set_option('display.max_columns', None)
inv_df.select_dtypes(include = 'object',exclude = 'int64').head(5)

Unnamed: 0,term,grade,sub_grade,home_ownership,verification_status,loan_status
0,36 months,C,C4,MORTGAGE,Not Verified,Yes
1,36 months,C,C1,MORTGAGE,Not Verified,Yes
2,60 months,C,C5,MORTGAGE,Source Verified,No
3,60 months,F,F1,MORTGAGE,Source Verified,Yes
4,36 months,C,C3,RENT,Source Verified,Yes


In [7]:
inv_df.sub_grade.value_counts()

C1    137982
B4    133389
B5    132925
B3    125923
C2    124221
C3    121715
C4    119929
B2    119553
B1    118195
C5    109422
A5    102972
A4     90405
A1     82768
D1     77527
A3     69057
D2     68601
A2     65448
D3     60781
D4     53609
D5     44961
E1     32390
E2     28809
E3     25351
E4     21534
E5     20996
F1     12822
F2      8965
F3      7427
F4      5855
F5      4867
G1      3746
G2      2600
G3      2005
G4      1629
G5      1474
Name: sub_grade, dtype: int64

In [8]:
inv_df.grade.value_counts()

B    629985
C    613269
A    410650
D    305479
E    129080
F     39936
G     11454
Name: grade, dtype: int64

In [9]:
inv_df.home_ownership.value_counts()

MORTGAGE    1031508
RENT         865511
OWN          241610
ANY             995
OTHER           182
NONE             47
Name: home_ownership, dtype: int64

In [10]:
inv_df.verification_status.value_counts()

Source Verified    851898
Not Verified       700184
Verified           587771
Name: verification_status, dtype: int64

In [11]:
inv_df.loan_status.value_counts()

No     1080621
Yes    1059232
Name: loan_status, dtype: int64

In [12]:
inv_df.dtypes

loan_amnt               float64
term                     object
int_rate                float64
installment             float64
grade                    object
sub_grade                object
emp_length                int64
home_ownership           object
annual_inc              float64
verification_status      object
loan_status              object
dti                     float64
open_acc                  int64
pub_rec                   int64
revol_bal               float64
revol_util              float64
total_acc                 int64
mort_acc                  int64
pub_rec_bankruptcies      int64
cibil_score               int64
dtype: object

In [13]:
def obj_to_num_inv(inv_df):
  
  col_del_list = []   
  
  term_to_int = {' 36 months': 1,
                  ' 60 months': 2}
  
  grade_to_int = dict(zip(['A', 'B', 'C', 'D', 'E', 'F', 'G'], np.arange(7, 0, -1)))
    
  sub_grade_sorted = {'G5': 0, 'G4': 1, 'G3': 2, 'G2': 3, 'G1': 4,
                      'F5': 5, 'F4': 6, 'F3': 7, 'F2': 8, 'F1': 9,
                      'E5': 10, 'E4': 11, 'E3': 12, 'E2': 13, 'E1': 14,
                      'D5': 15, 'D4': 16, 'D3': 17, 'D2': 18, 'D1': 19,
                      'C5': 20, 'C4': 21, 'C3': 22, 'C2': 23, 'C1': 24,
                      'B5': 25, 'B4': 26, 'B3': 27, 'B2': 28, 'B1': 29,
                      'A5': 30, 'A4': 31, 'A3': 32, 'A2': 33, 'A1': 34}    
  
  home_to_int = {'MORTGAGE': 4, 'RENT': 3, 'OWN': 5,'ANY': 2,'OTHER': 1, 'NONE':0 }
  
  ver_stat_to_int = {'Source Verified':2,'Verified': 1,'Not Verified': 0}

  status_to_int = {'Yes':0,'No':1}

  inv_df.replace({'term': term_to_int, 'grade':grade_to_int,'sub_grade': sub_grade_sorted, 'home_ownership':home_to_int,
                  'verification_status':ver_stat_to_int,'loan_status': status_to_int}, inplace=True)

  # Change in Columns
  inv_df['pub_rec_bankruptcies'] = np.where(inv_df["pub_rec_bankruptcies"]>0, 1, 0)
  inv_df['pub_rec'] = np.where(inv_df["pub_rec"]>0, 1, 0)

  # Calculating Balance Annual Income
  inv_df['bal_annual_inc'] = inv_df['loan_amnt'] / inv_df['annual_inc']

  # Calculating Installment Amount Ratio
  inv_df['inst_anmt_ratio'] = inv_df['installment'] / inv_df['loan_amnt']
  col_del_list.append('loan_amnt')

  inv_df['annual_inc']=np.log(inv_df['annual_inc'])

  # Calculating Credit line Ratio
  inv_df['credit_line_ratio'] = inv_df['open_acc'] / inv_df['total_acc']
  col_del_list.extend(['open_acc','total_acc'])

  # Dropping the columns
  inv_df = inv_df.drop(columns = col_del_list)

  return inv_df

inv_scale_df = obj_to_num_inv(inv_df)


In [14]:
inv_scale_df.head()

Unnamed: 0,term,int_rate,installment,grade,sub_grade,emp_length,home_ownership,annual_inc,verification_status,loan_status,dti,pub_rec,revol_bal,revol_util,mort_acc,pub_rec_bankruptcies,cibil_score,bal_annual_inc,inst_anmt_ratio,credit_line_ratio
0,1,13.99,6912.03,5,21,10,4,14.943679,0,0,5.91,0,155342.26,29.7,1,0,677,0.065455,0.034175,0.538462
1,1,11.99,46084.68,5,24,10,4,15.110733,0,0,16.06,0,1206220.03,19.2,4,0,717,0.38,0.03321,0.578947
2,2,14.85,46625.15,5,20,10,4,15.636826,2,1,17.06,0,438329.23,11.6,1,0,787,0.318182,0.023711,0.764706
3,2,22.45,16287.62,2,9,3,4,15.584891,2,0,25.37,0,1232007.4,64.5,6,0,697,0.099585,0.027876,0.342857
4,1,13.44,22763.68,5,22,4,3,14.462706,2,0,10.2,0,495634.52,68.4,0,0,692,0.351471,0.033906,0.833333


In [15]:
inv_scale_df.skew()

term                    0.977928
int_rate                0.754225
installment             1.017385
grade                  -0.643868
sub_grade              -0.598433
emp_length             -0.191110
home_ownership          0.376707
annual_inc              0.237342
verification_status    -0.135220
loan_status            -0.019992
dti                     0.231028
pub_rec                 1.861535
revol_bal               8.326329
revol_util             -0.006663
mort_acc                1.689331
pub_rec_bankruptcies    2.338885
cibil_score             1.197485
bal_annual_inc          0.757374
inst_anmt_ratio        -0.710910
credit_line_ratio       0.422195
dtype: float64

In [16]:
pd.set_option('display.max_columns', None)
inv_scale_df.head(10)

Unnamed: 0,term,int_rate,installment,grade,sub_grade,emp_length,home_ownership,annual_inc,verification_status,loan_status,dti,pub_rec,revol_bal,revol_util,mort_acc,pub_rec_bankruptcies,cibil_score,bal_annual_inc,inst_anmt_ratio,credit_line_ratio
0,1,13.99,6912.03,5,21,10,4,14.943679,0,0,5.91,0,155342.26,29.7,1,0,677,0.065455,0.034175,0.538462
1,1,11.99,46084.68,5,24,10,4,15.110733,0,0,16.06,0,1206220.03,19.2,4,0,717,0.38,0.03321,0.578947
2,2,14.85,46625.15,5,20,10,4,15.636826,2,1,17.06,0,438329.23,11.6,1,0,787,0.318182,0.023711,0.764706
3,2,22.45,16287.62,2,9,3,4,15.584891,2,0,25.37,0,1232007.4,64.5,6,0,697,0.099585,0.027876,0.342857
4,1,13.44,22763.68,5,22,4,3,14.462706,2,0,10.2,0,495634.52,68.4,0,0,692,0.351471,0.033906,0.833333
5,1,9.17,35820.3,6,28,10,4,16.129302,0,0,14.67,0,4906287.31,84.5,4,0,682,0.111111,0.031879,0.444444
6,1,8.49,35465.23,6,29,10,4,15.378997,0,0,17.61,0,46406.04,5.7,3,0,707,0.235294,0.031563,0.533333
7,1,6.49,17216.87,7,33,6,3,15.378997,0,0,13.07,1,587884.79,34.5,1,1,687,0.117647,0.030645,0.608696
8,1,11.48,14817.35,6,25,10,4,14.674015,0,0,34.8,0,395181.73,39.1,1,0,702,0.190476,0.032968,0.444444
9,2,12.88,28557.13,5,23,6,4,15.490222,0,1,22.98,0,731934.54,32.7,5,0,712,0.235789,0.022692,0.457143


In [17]:
inv_scale_df.dropna(subset = ['credit_line_ratio'], inplace = True)

In [18]:
inv_scale_df.to_csv('/content/drive/MyDrive/Colab Notebooks/Individual_clean_ML.csv',index = False)

<a id= 'joint'></a>
> ## B. Joint Account

In [None]:
joint_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Joint_scale.csv')

In [None]:
joint_df.head()

Unnamed: 0,loan_amnt,term,int_rate,installment,grade,sub_grade,emp_length,home_ownership,annual_inc,verification_status,loan_status,dti,open_acc,pub_rec,revol_bal,revol_util,total_acc,annual_inc_joint,dti_joint,verification_status_joint,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,inq_fi,total_cu_tl,inq_last_12m,mort_acc,pub_rec_bankruptcies,revol_bal_joint,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,cibil_score,sec_app_cibil_score
0,629234.48,60 months,30.79,20664.73,G,G1,1,RENT,2471992.6,Not Verified,No,43.97,8,0,85733.2,24.6,14,4550713.65,31.94,Not Verified,1,4,1,4,8,3688156.78,89.0,1,1,56799.65,84.0,8,1,10,0.0,0,398945.9,3,1,14,80.0,11,8,667,612
1,1123633.0,36 months,10.41,36473.69,B,B3,6,MORTGAGE,1067451.35,Verified,No,45.17,6,0,963178.21,33.4,9,2809082.5,20.86,Verified,1,1,0,0,59,31742.63,4.0,1,2,380686.86,24.0,0,0,1,0.0,0,1276222.36,0,1,8,44.1,1,6,797,667
2,1078687.68,60 months,17.47,27081.8,D,D1,5,RENT,2050630.22,Verified,No,19.76,14,0,746935.04,62.1,18,2977627.45,19.15,Source Verified,0,0,0,0,0,0.0,68.54683,0,1,140903.58,62.0,0,0,0,0.0,0,926716.32,0,0,14,59.0,0,16,677,672
3,1404541.25,60 months,12.61,31678.02,C,C1,6,MORTGAGE,2247266.0,Not Verified,No,27.78,8,0,6033965.39,60.6,16,6179981.5,12.15,Not Verified,0,0,0,0,57,0.0,68.54683,0,2,419733.11,61.0,0,2,1,2.0,0,2001976.92,0,2,10,56.0,0,11,727,692
4,168544.95,36 months,10.41,5471.53,B,B3,10,MORTGAGE,3370899.0,Not Verified,No,27.94,20,0,984471.05,24.1,36,4831621.9,20.64,Not Verified,3,4,3,4,1,2176589.48,81.0,3,8,242199.09,47.0,2,0,7,5.0,0,2513567.02,2,0,6,32.1,1,5,772,642


In [None]:
joint_df.skew()

loan_amnt                 0.349420
int_rate                  0.883889
installment               0.669546
emp_length                0.095023
annual_inc                3.147945
dti                       8.547300
open_acc                  1.144426
pub_rec                   2.966552
revol_bal                 6.039042
revol_util                0.093473
total_acc                 0.973892
annual_inc_joint          2.464895
dti_joint                 0.197660
open_acc_6m               1.650364
open_act_il               2.635655
open_il_12m               1.673514
open_il_24m               1.569746
mths_since_rcnt_il        2.936924
total_bal_il              2.594103
il_util                  -0.468238
open_rv_12m               1.911695
open_rv_24m               1.759718
max_bal_bc                1.886417
all_util                 -0.328586
inq_fi                    2.216711
total_cu_tl               2.754198
inq_last_12m              2.110888
mort_acc                  1.157866
pub_rec_bankruptcies

In [None]:
# Columns Needed to Change Dtypes (Object-to-integer/datetime)

pd.set_option('display.max_columns', None)
joint_df.select_dtypes(include = 'object',exclude = 'int64').head(5)

Unnamed: 0,term,grade,sub_grade,home_ownership,verification_status,loan_status,verification_status_joint
0,60 months,G,G1,RENT,Not Verified,No,Not Verified
1,36 months,B,B3,MORTGAGE,Verified,No,Verified
2,60 months,D,D1,RENT,Verified,No,Source Verified
3,60 months,C,C1,MORTGAGE,Not Verified,No,Not Verified
4,36 months,B,B3,MORTGAGE,Not Verified,No,Not Verified


In [None]:
joint_df.term.value_counts()

 36 months    61758
 60 months    45388
Name: term, dtype: int64

In [None]:
joint_df.grade.value_counts()

C    31748
B    30015
A    21045
D    16592
E     5547
F     1569
G      630
Name: grade, dtype: int64

In [None]:
joint_df.sub_grade.value_counts()

C1    6820
B1    6560
C3    6526
B2    6464
B5    6456
C5    6318
C4    6143
C2    5941
B4    5551
A4    5132
B3    4984
A5    4249
A2    3929
A3    3920
A1    3815
D1    3670
D2    3645
D3    3562
D4    2958
D5    2757
E5    1527
E3    1177
E4    1038
E2     928
E1     877
F1     496
G1     339
F3     308
F2     274
F5     258
F4     233
G5      79
G4      72
G3      71
G2      69
Name: sub_grade, dtype: int64

In [None]:
joint_df.home_ownership.value_counts()

MORTGAGE    70517
RENT        26163
OWN         10463
NONE            3
Name: home_ownership, dtype: int64

In [None]:
joint_df.verification_status.value_counts()

Not Verified       41590
Verified           35123
Source Verified    30433
Name: verification_status, dtype: int64

In [None]:
joint_df.verification_status_joint.value_counts()

Not Verified       49297
Source Verified    34545
Verified           23304
Name: verification_status_joint, dtype: int64

In [None]:
def obj_to_num_joint(joint_df):
  
  col_del_list = []   
  
  term_to_int = {' 36 months': 1,
                  ' 60 months': 2}
  
  grade_to_int = dict(zip(['A', 'B', 'C', 'D', 'E', 'F', 'G'], np.arange(7, 0, -1)))
    
  sub_grade_sorted = {'G5': 0, 'G4': 1, 'G3': 2, 'G2': 3, 'G1': 4,
                      'F5': 5, 'F4': 6, 'F3': 7, 'F2': 8, 'F1': 9,
                      'E5': 10, 'E4': 11, 'E3': 12, 'E2': 13, 'E1': 14,
                      'D5': 15, 'D4': 16, 'D3': 17, 'D2': 18, 'D1': 19,
                      'C5': 20, 'C4': 21, 'C3': 22, 'C2': 23, 'C1': 24,
                      'B5': 25, 'B4': 26, 'B3': 27, 'B2': 28, 'B1': 29,
                      'A5': 30, 'A4': 31, 'A3': 32, 'A2': 33, 'A1': 34}    
  
  home_to_int = {'MORTGAGE': 4, 'RENT': 3, 'OWN': 5,'ANY': 2,'OTHER': 1, 'NONE':0 }
  
  ver_stat_to_int = {'Source Verified':2,'Verified': 1,'Not Verified': 0}

  ver_stat_to_int_joint = {'Source Verified':2,'Verified': 1,'Not Verified': 0}

  status_to_int = {'Yes':0,'No':1}

  joint_df.replace({'term': term_to_int, 'grade':grade_to_int,'sub_grade': sub_grade_sorted,
                    'home_ownership':home_to_int,'verification_status':ver_stat_to_int,
                    'verification_status_joint':ver_stat_to_int_joint,'loan_status': status_to_int}, inplace=True)

  joint_df['annual_inc']+=1
  joint_df['annual_inc_joint']+=1
  joint_df['annual_inc']=np.log(joint_df['annual_inc'])
  joint_df['total_bal_il']+=1
  joint_df['total_bal_il']=np.log(joint_df['total_bal_il'])
  joint_df['revol_bal']+=1
  joint_df['revol_bal']=np.log(joint_df['revol_bal'])
  joint_df['revol_bal_joint']+=1
  joint_df['revol_bal_joint']=np.log(joint_df['revol_bal_joint'])
  joint_df['annual_inc_joint']=np.log(joint_df['annual_inc_joint'])
  joint_df['max_bal_bc']+=1
  joint_df['max_bal_bc']=np.log(joint_df['max_bal_bc'])

  #Balance_annual_income
  joint_df['balance_annual_inc'] = np.where(joint_df['annual_inc']==0,0 , joint_df['loan_amnt'] / joint_df['annual_inc'])
  
  #Balance_annual_income
  joint_df['sec_balance_annual_inc'] = np.where(joint_df['annual_inc_joint']==0,0 , joint_df['loan_amnt'] / joint_df['annual_inc_joint'])

  joint_df['inst_amnt_ratio']=joint_df['installment']/joint_df['loan_amnt']
  col_del_list.extend(['installment','loan_amnt'])
  
  #Credit_line_ratio 
  joint_df['credit_line_ratio']=joint_df['open_acc']/joint_df['total_acc']
  
  col_del_list.extend(['open_acc','total_acc'])
  
  #mort_acc
  joint_df['mort_acc'] = np.where(joint_df['mort_acc'].isnull(), 0,joint_df['mort_acc'])

  #pub_rec_bankruptcies
  joint_df['pub_rec_bankruptcies'] = np.where(joint_df['pub_rec_bankruptcies']>0, 1, 0)
  
  # pub_rec
  joint_df['pub_rec'] = np.where(joint_df['pub_rec']>0, 1, 0)

  return joint_df

joint_scale_df = obj_to_num_joint(joint_df)

In [None]:
joint_scale_df.head()

Unnamed: 0,loan_amnt,term,int_rate,installment,grade,sub_grade,emp_length,home_ownership,annual_inc,verification_status,loan_status,dti,open_acc,pub_rec,revol_bal,revol_util,total_acc,annual_inc_joint,dti_joint,verification_status_joint,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,inq_fi,total_cu_tl,inq_last_12m,mort_acc,pub_rec_bankruptcies,revol_bal_joint,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,cibil_score,sec_app_cibil_score,balance_annual_inc,sec_balance_annual_inc,inst_amnt_ratio,credit_line_ratio
0,629234.48,2,30.79,20664.73,1,4,1,3,14.720536,0,1,43.97,8,0,11.359007,24.6,14,15.330795,31.94,0,1,4,1,4,8,15.120638,89.0,1,1,10.947303,84.0,8,1,10,0.0,0,12.896584,3,1,14,80.0,11,8,667,612,42745.352547,41043.826258,0.032841,0.571429
1,1123633.0,1,10.41,36473.69,6,27,6,4,13.880785,1,1,45.17,6,0,13.777995,33.4,9,14.848369,20.86,1,1,1,0,0,59,10.365447,4.0,1,2,12.849735,24.0,0,0,1,0.0,0,14.059416,0,1,8,44.1,1,6,797,667,80948.805759,75673.834126,0.032461,0.666667
2,1078687.68,2,17.47,27081.8,4,19,5,3,14.533658,1,1,19.76,14,0,13.523735,62.1,18,14.906638,19.15,2,0,0,0,0,0,0.0,68.54683,0,1,11.855838,62.0,0,0,0,0.0,0,13.739404,0,0,14,59.0,0,16,677,672,74219.970222,72362.909752,0.025106,0.777778
3,1404541.25,2,12.61,31678.02,5,24,6,4,14.625225,0,1,27.78,8,0,15.612915,60.6,16,15.636826,12.15,0,0,0,0,0,57,0.0,68.54683,0,2,12.947377,61.0,0,2,1,2.0,0,14.509646,0,2,10,56.0,0,11,727,692,96035.528653,89822.656478,0.022554,0.5
4,168544.95,1,10.41,5471.53,6,27,10,4,15.03069,0,1,27.94,20,0,13.799861,24.1,36,15.390693,20.64,0,3,4,3,4,1,14.59327,81.0,3,8,12.397519,47.0,2,0,7,5.0,0,14.737214,2,0,6,32.1,1,5,772,642,11213.387164,10951.095593,0.032463,0.555556


In [None]:
joint_scale_df.skew()

loan_amnt                    0.349420
term                         0.309199
int_rate                     0.883889
installment                  0.669546
grade                       -0.594009
sub_grade                   -0.552815
emp_length                   0.095023
home_ownership              -0.022295
annual_inc                  -6.640559
verification_status          0.192623
dti                          8.547300
open_acc                     1.144426
pub_rec                      2.188209
revol_bal                   -4.188446
revol_util                   0.093473
total_acc                    0.973892
annual_inc_joint             0.043571
dti_joint                    0.197660
verification_status_joint    0.270250
open_acc_6m                  1.650364
open_act_il                  2.635655
open_il_12m                  1.673514
open_il_24m                  1.569746
mths_since_rcnt_il           2.936924
total_bal_il                -2.124451
il_util                     -0.468238
open_rv_12m 

In [None]:
pd.set_option('display.max_columns', None)
joint_scale_df.head(10)

Unnamed: 0,loan_amnt,term,int_rate,installment,grade,sub_grade,emp_length,home_ownership,annual_inc,verification_status,loan_status,dti,open_acc,pub_rec,revol_bal,revol_util,total_acc,annual_inc_joint,dti_joint,verification_status_joint,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,inq_fi,total_cu_tl,inq_last_12m,mort_acc,pub_rec_bankruptcies,revol_bal_joint,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,cibil_score,sec_app_cibil_score,balance_annual_inc,sec_balance_annual_inc,inst_amnt_ratio,credit_line_ratio
0,629234.48,2,30.79,20664.73,1,4,1,3,14.720536,0,1,43.97,8,0,11.359007,24.6,14,15.330795,31.94,0,1,4,1,4,8,15.120638,89.0,1,1,10.947303,84.0,8,1,10,0.0,0,12.896584,3,1,14,80.0,11,8,667,612,42745.352547,41043.826258,0.032841,0.571429
1,1123633.0,1,10.41,36473.69,6,27,6,4,13.880785,1,1,45.17,6,0,13.777995,33.4,9,14.848369,20.86,1,1,1,0,0,59,10.365447,4.0,1,2,12.849735,24.0,0,0,1,0.0,0,14.059416,0,1,8,44.1,1,6,797,667,80948.805759,75673.834126,0.032461,0.666667
2,1078687.68,2,17.47,27081.8,4,19,5,3,14.533658,1,1,19.76,14,0,13.523735,62.1,18,14.906638,19.15,2,0,0,0,0,0,0.0,68.54683,0,1,11.855838,62.0,0,0,0,0.0,0,13.739404,0,0,14,59.0,0,16,677,672,74219.970222,72362.909752,0.025106,0.777778
3,1404541.25,2,12.61,31678.02,5,24,6,4,14.625225,0,1,27.78,8,0,15.612915,60.6,16,15.636826,12.15,0,0,0,0,0,57,0.0,68.54683,0,2,12.947377,61.0,0,2,1,2.0,0,14.509646,0,2,10,56.0,0,11,727,692,96035.528653,89822.656478,0.022554,0.5
4,168544.95,1,10.41,5471.53,6,27,10,4,15.03069,0,1,27.94,20,0,13.799861,24.1,36,15.390693,20.64,0,3,4,3,4,1,14.59327,81.0,3,8,12.397519,47.0,2,0,7,5.0,0,14.737214,2,0,6,32.1,1,5,772,642,11213.387164,10951.095593,0.032463,0.555556
5,1460722.9,2,11.98,32478.61,6,25,1,4,14.432854,0,1,20.05,9,0,12.868739,15.9,21,15.609174,18.8,0,1,1,1,2,8,14.140027,93.0,2,2,10.843153,45.0,3,8,11,2.0,0,14.285934,0,2,17,31.6,2,20,707,722,101208.18401,93581.047654,0.022235,0.428571
6,898906.4,2,11.98,19986.62,6,25,5,4,15.490223,0,1,11.79,11,0,13.285911,41.3,24,15.913079,7.93,0,2,2,0,1,20,13.190162,42.0,3,4,12.721251,41.0,1,1,2,1.0,0,13.774313,0,1,3,20.7,1,4,727,697,58030.567163,56488.526266,0.022234,0.458333
7,466307.7,1,7.34,14471.27,7,31,4,4,14.625225,0,1,30.84,16,0,13.077237,50.9,27,15.490223,20.55,0,2,8,0,1,24,14.628445,75.0,2,3,12.572234,69.0,0,5,1,1.0,0,13.327858,3,1,9,45.9,6,5,722,642,31883.795855,30103.357038,0.031034,0.592593
8,1797812.8,2,25.81,53625.38,3,11,10,4,15.764659,0,1,31.17,28,0,14.414381,37.3,47,15.935906,30.37,0,0,5,3,3,8,15.538511,70.0,0,1,13.648787,54.0,1,3,3,4.0,0,15.676047,0,0,13,48.5,0,15,722,682,114040.700792,112815.220466,0.029828,0.595745
9,1404541.25,2,14.07,32732.55,5,22,3,4,14.491694,1,1,53.03,8,1,13.637976,60.3,18,15.511057,21.04,1,3,3,2,3,3,14.934235,88.0,1,1,9.316947,80.0,2,3,3,2.0,1,14.096363,3,3,9,63.1,3,6,682,677,96920.432222,90550.971692,0.023305,0.444444


In [None]:
joint_scale_df.to_csv('/content/drive/MyDrive/Colab Notebooks/Joint_clean_ML.csv',index = False)