## 5.1. Before cleaning dataset

In [57]:
print('Original shape: ', df1.shape)

## target
target = ['loan_status']
print("\nTarget var: ", target)

## cat vars
cat_vars = [var for var in df1.columns if df1[var].dtypes == 'O' if var not in target]
print("\nCategorical vars: ", cat_vars)
print('There are {} categorical variables'.format(len(cat_vars)))

num_vars = [var for var in df1.columns if df1[var].dtypes != 'O' if var not in target]
print("\nNumerical vars: ", num_vars)
print('There are {} numerical variables'.format(len(num_vars)))

dis_vars = [var for var in num_vars if len(df1[var].unique()) < 30]
print("\nDiscrete vars: ", dis_vars)
print('There are {} numerical variables'.format(len(dis_vars)))

cont_vars = [var for var in num_vars if var not in dis_vars]
print("\nContinous vars: ", cont_vars)
print('There are {} numerical variables'.format(len(cont_vars)))

Original shape:  (2260668, 145)

Target var:  ['loan_status']

Categorical vars:  ['term', 'grade', 'sub_grade', 'emp_title', 'emp_length', 'home_ownership', 'verification_status', 'issue_d', 'pymnt_plan', 'desc', 'purpose', 'title', 'zip_code', 'addr_state', 'earliest_cr_line', 'initial_list_status', 'last_pymnt_d', 'next_pymnt_d', 'last_credit_pull_d', 'application_type', 'verification_status_joint', 'sec_app_earliest_cr_line', 'hardship_flag', 'hardship_type', 'hardship_reason', 'hardship_status', 'hardship_start_date', 'hardship_end_date', 'payment_plan_start_date', 'hardship_loan_status', 'disbursement_method', 'debt_settlement_flag', 'debt_settlement_flag_date', 'settlement_status', 'settlement_date']
There are 35 categorical variables

Numerical vars:  ['id', 'member_id', 'loan_amnt', 'funded_amnt', 'funded_amnt_inv', 'int_rate', 'installment', 'annual_inc', 'url', 'dti', 'delinq_2yrs', 'inq_last_6mths', 'mths_since_last_delinq', 'mths_since_last_record', 'open_acc', 'pub_rec', 

## 5.2. Cleaning data

### 5.2.1. Define target

In [46]:
df1['loan_status'].value_counts()

Fully Paid                                             1041952
Current                                                 919695
Charged Off                                             261655
Late (31-120 days)                                       21897
In Grace Period                                           8952
Late (16-30 days)                                         3737
Does not meet the credit policy. Status:Fully Paid        1988
Does not meet the credit policy. Status:Charged Off        761
Default                                                     31
Name: loan_status, dtype: int64

In [58]:
df1['loan_repaid'] = df1['loan_status'].map({'Fully Paid':0, 'Charged Off':1})
df1 = df1.drop('loan_status',axis=1)
df1.shape

(2260668, 145)

### 5.2.2. Remove redundant rows

In [59]:
df1 = df1[df1.loan_repaid.isna() == False]
df1 = df1.reset_index(drop = True)
print('New shape: ', df1.shape)

New shape:  (1303607, 145)


### 5.2.3. Remove unneccessary cols

In [60]:
removed_vars = ['id', 'member_id', 'url', 'policy_code']
seleveted_vars = [var for var in df1.columns if var not in removed_vars]
df1 = df1[seleveted_vars]
print(df1.shape)

(1303607, 141)


### 5.2.4. Convert wrong format to right format

#### Convert ordinal vars to number

In [61]:
print(df1['term'].value_counts())
df1['term'] = df1['term'].apply(lambda term: int(term[:3]))
df1['term'].value_counts()

 36 months    988754
 60 months    314853
Name: term, dtype: int64


36    988754
60    314853
Name: term, dtype: int64

In [62]:
print(df1['emp_length'].unique())
df1['num_emp_length'] = df1['emp_length'].map({'10+ years':10, 
                                         '9 years':9, 
                                         '8 years':8,
                                         '7 years':7,
                                         '6 years':6,
                                         '5 years':5,
                                         '4 years':4,
                                         '3 years':3,
                                         '2 years':2,
                                         '1 year':1,
                                         '< 1 year':0.5})
df1 = df1.drop('emp_length',axis=1)
df1['num_emp_length'].value_counts()

['5 years' '< 1 year' '10+ years' '3 years' '4 years' '1 year' '8 years'
 nan '2 years' '6 years' '9 years' '7 years']


10.0000    428547
2.0000     117820
0.5000     104550
3.0000     104200
1.0000      85677
5.0000      81623
4.0000      78029
6.0000      60933
8.0000      59125
7.0000      58145
9.0000      49504
Name: num_emp_length, dtype: int64

#### Convert nominal vars to categorical vars

#### Convert vars to datetime

In [63]:
df1[['issue_d', 'last_pymnt_d','earliest_cr_line', 'next_pymnt_d', 'last_credit_pull_d']].head()

Unnamed: 0,issue_d,last_pymnt_d,earliest_cr_line,next_pymnt_d,last_credit_pull_d
0,Dec-2018,Jan-2019,Jan-2012,,Jan-2019
1,Dec-2018,Feb-2019,Jun-2009,,Feb-2019
2,Dec-2018,Feb-2019,Feb-1999,,Feb-2019
3,Dec-2018,Feb-2019,Dec-2003,,Feb-2019
4,Dec-2018,Feb-2019,Oct-1997,,Feb-2019


In [64]:
df1['issue_d'] = pd.to_datetime(df1.issue_d)
df1['last_pymnt_d'] = pd.to_datetime(df1.last_pymnt_d)
df1['earliest_cr_line'] = pd.to_datetime(df1.earliest_cr_line)
df1['next_pymnt_d'] = pd.to_datetime(df1.next_pymnt_d)
df1['last_credit_pull_d'] = pd.to_datetime(df1.last_credit_pull_d)

df1[['issue_d', 'last_pymnt_d','earliest_cr_line', 'next_pymnt_d', 'last_credit_pull_d']].head()

Unnamed: 0,issue_d,last_pymnt_d,earliest_cr_line,next_pymnt_d,last_credit_pull_d
0,2018-12-01,2019-01-01,2012-01-01,NaT,2019-01-01
1,2018-12-01,2019-02-01,2009-06-01,NaT,2019-02-01
2,2018-12-01,2019-02-01,1999-02-01,NaT,2019-02-01
3,2018-12-01,2019-02-01,2003-12-01,NaT,2019-02-01
4,2018-12-01,2019-02-01,1997-10-01,NaT,2019-02-01


### 5.2.5. Remove duplicated features 

### 5.2.6. Remove independent features with high correlation

### 5.2.7. Reset index

In [None]:
df1 = df1.reset_index(drop = True)

## 5.3. After cleaning

In [67]:
## target
target = ['Class']
print("\nTarget var: ", target)

text_vars = ['desc']

mixed_vars = ['Ticket','Cabin']

## cat vars
cat_vars = [var for var in df1.columns if df1[var].dtypes == 'O' if var not in target]
print("\n========================================================================")
print("\nCategorical vars: ", cat_vars)
print('There are {} categorical variables'.format(len(cat_vars)))

one_level = [var for var in cat_vars if len(df1[var].unique()) == 1 if var not in mixed_vars+text_vars]
print("\nOne level vars: ", one_level)
print('There are {} One level variables'.format(len(one_level)))

low_levels = [var for var in cat_vars if len(df1[var].unique()) <= 30 if var not in mixed_vars+text_vars]
print("\nLow cardinality vars: ", low_levels)
print('There are {} Low cardinality variables'.format(len(low_levels)))

high_levels = [var for var in cat_vars if len(df1[var].unique()) > 30 if var not in mixed_vars+text_vars]
print("\nHigh cardinality vars: ", high_levels)
print('There are {} High cardinality variables'.format(len(high_levels)))

num_vars = [var for var in df1.columns if df1[var].dtypes != 'O' if var not in target]
print("\n========================================================================")
print("\nNumerical vars: ", num_vars)
print('\nThere are {} Numerical variables'.format(len(num_vars)))

dis_vars = [var for var in num_vars if len(df1[var].unique()) < 20]
print("\nDiscrete vars: ", dis_vars)
print('There are {} Discrete variables'.format(len(dis_vars)))

cont_vars = [var for var in num_vars if var not in dis_vars]
print("\nContinous vars: ", cont_vars)
print('There are {} Continous variables'.format(len(cont_vars)))

datetime_vars = [var for var in df1.columns if df1[var].dtypes == '<M8[ns]']
print("\n========================================================================")
print("\nDateTime vars: ", datetime_vars)
print('There are {} DateTime variables'.format(len(datetime_vars)))

print("\n========================================================================")
print("\nMixed type vars: ", mixed_vars)
print('There are {} Mixed type  variables'.format(len(mixed_vars)))

print("\n========================================================================")
print("\nText vars: ", text_vars)
print('There are {} Text variables'.format(len(text_vars)))


Target var:  ['Class']


Categorical vars:  ['grade', 'sub_grade', 'emp_title', 'home_ownership', 'verification_status', 'pymnt_plan', 'desc', 'purpose', 'title', 'zip_code', 'addr_state', 'initial_list_status', 'application_type', 'verification_status_joint', 'sec_app_earliest_cr_line', 'hardship_flag', 'hardship_type', 'hardship_reason', 'hardship_status', 'hardship_start_date', 'hardship_end_date', 'payment_plan_start_date', 'hardship_loan_status', 'disbursement_method', 'debt_settlement_flag', 'debt_settlement_flag_date', 'settlement_status', 'settlement_date']
There are 28 categorical variables

One level vars:  ['pymnt_plan']
There are 1 One level variables

Low cardinality vars:  ['grade', 'home_ownership', 'verification_status', 'pymnt_plan', 'purpose', 'initial_list_status', 'application_type', 'verification_status_joint', 'hardship_flag', 'hardship_type', 'hardship_reason', 'hardship_status', 'hardship_start_date', 'hardship_end_date', 'payment_plan_start_date', 'hardship_lo

-----------------------------------------------------------------------------------------------------------------------

### 4.5. Reset index

In [69]:
df1.head(5)

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,home_ownership,annual_inc,verification_status,issue_d,pymnt_plan,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term,loan_repaid,num_emp_length
0,30000,30000,30000.0,36,22.35,1151.16,D,D5,Supervisor,MORTGAGE,100000.0,Source Verified,2018-12-01,n,,debt_consolidation,Debt consolidation,917xx,CA,30.46,0.0,2012-01-01,0.0,51.0,84.0,11.0,1.0,15603,37.0,19.0,w,0.0,0.0,30026.4425,30026.44,30000.0,26.44,0.0,0.0,0.0,2019-01-01,30082.32,NaT,2019-01-01,0.0,,Joint App,150000.0,23.38,Source Verified,0.0,0.0,472330.0,1.0,3.0,2.0,2.0,2.0,82850.0,75.0,0.0,1.0,9713.0,60.0,42200.0,1.0,1.0,3.0,4.0,42939.0,15181.0,46.9,0.0,0.0,83.0,73.0,23.0,2.0,1.0,23.0,,8.0,,0.0,3.0,4.0,3.0,5.0,10.0,6.0,8.0,4.0,11.0,0.0,0.0,0.0,2.0,89.5,33.3,1.0,0.0,527120.0,98453.0,28600.0,101984.0,52417.0,Jul-2006,0.0,1.0,16.0,25.2,2.0,15.0,0.0,0.0,70.0,N,,,,,,,,,,,,,,,Cash,N,,,,,,,0.0,5.0
1,40000,40000,40000.0,60,16.14,975.71,C,C4,Assistant to the Treasurer (Payroll),MORTGAGE,45000.0,Verified,2018-12-01,n,,credit_card,Credit card refinancing,456xx,OH,50.53,0.0,2009-06-01,0.0,,,18.0,0.0,34971,64.5,37.0,w,0.0,0.0,40856.679,40856.68,40000.0,856.68,0.0,0.0,0.0,2019-02-01,40910.48,NaT,2019-02-01,0.0,,Joint App,92000.0,35.66,Verified,0.0,0.0,271068.0,2.0,8.0,3.0,4.0,5.0,126749.0,87.0,1.0,1.0,5874.0,75.0,54200.0,4.0,2.0,4.0,5.0,15059.0,14930.0,67.3,0.0,0.0,114.0,70.0,2.0,2.0,1.0,2.0,,9.0,,0.0,7.0,9.0,7.0,7.0,26.0,9.0,10.0,9.0,18.0,0.0,0.0,0.0,4.0,100.0,42.9,0.0,0.0,344802.0,161720.0,45700.0,167965.0,47188.0,Apr-1990,0.0,1.0,32.0,61.1,16.0,19.0,0.0,0.0,,N,,,,,,,,,,,,,,,Cash,N,,,,,,,0.0,0.5
2,20000,20000,20000.0,36,7.56,622.68,A,A3,Teacher,MORTGAGE,100000.0,Not Verified,2018-12-01,n,,credit_card,Credit card refinancing,982xx,WA,18.92,0.0,1999-02-01,0.0,48.0,,9.0,0.0,25416,29.9,19.0,w,0.0,0.0,20215.7924,20215.79,20000.0,215.79,0.0,0.0,0.0,2019-02-01,20228.39,NaT,2019-02-01,0.0,,Joint App,190000.0,11.75,Not Verified,0.0,0.0,515779.0,1.0,2.0,0.0,1.0,13.0,46153.0,71.0,1.0,2.0,9759.0,39.0,85100.0,2.0,2.0,0.0,5.0,57309.0,59684.0,29.9,0.0,0.0,171.0,238.0,1.0,1.0,5.0,1.0,,13.0,48.0,0.0,5.0,5.0,5.0,6.0,5.0,5.0,9.0,5.0,9.0,0.0,0.0,0.0,1.0,94.7,20.0,0.0,0.0,622183.0,71569.0,85100.0,74833.0,43287.0,Aug-1998,0.0,3.0,10.0,29.7,2.0,7.0,0.0,0.0,,N,,,,,,,,,,,,,,,Cash,N,,,,,,,0.0,10.0
3,4500,4500,4500.0,36,11.31,147.99,B,B3,Accounts Examiner III,RENT,38500.0,Not Verified,2018-12-01,n,,credit_card,Credit card refinancing,786xx,TX,4.64,0.0,2003-12-01,0.0,25.0,,12.0,0.0,4472,15.3,25.0,w,0.0,0.0,4549.2171,4549.22,4500.0,49.22,0.0,0.0,0.0,2019-02-01,4553.46,NaT,2019-02-01,0.0,25.0,Individual,,,,0.0,306.0,29137.0,2.0,1.0,1.0,1.0,3.0,24665.0,100.0,3.0,7.0,3612.0,54.0,29200.0,0.0,3.0,3.0,8.0,2428.0,10735.0,28.9,0.0,0.0,180.0,69.0,2.0,2.0,0.0,2.0,,6.0,,1.0,3.0,5.0,5.0,6.0,10.0,11.0,15.0,5.0,12.0,0.0,0.0,0.0,4.0,91.7,0.0,0.0,0.0,53795.0,29137.0,15100.0,24595.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,,0.0,10.0
4,8425,8425,8425.0,36,27.27,345.18,E,E5,Senior Director Risk Management,MORTGAGE,450000.0,Verified,2018-12-01,n,,credit_card,Credit card refinancing,020xx,MA,12.37,0.0,1997-10-01,0.0,,,21.0,0.0,36812,65.7,37.0,w,0.0,0.0,8735.149,8735.15,8425.0,310.15,0.0,0.0,0.0,2019-02-01,8754.3,NaT,2019-02-01,0.0,,Joint App,515000.0,16.47,Not Verified,0.0,0.0,690320.0,0.0,6.0,1.0,6.0,12.0,152382.0,70.0,1.0,4.0,9744.0,68.0,56000.0,7.0,0.0,5.0,10.0,34516.0,11632.0,74.6,0.0,0.0,254.0,89.0,12.0,12.0,4.0,18.0,,4.0,,0.0,6.0,8.0,8.0,10.0,16.0,13.0,17.0,8.0,21.0,0.0,0.0,0.0,2.0,100.0,50.0,0.0,0.0,768304.0,189194.0,45800.0,189054.0,141905.0,Jun-2013,0.0,1.0,4.0,77.4,1.0,4.0,0.0,0.0,,N,,,,,,,,,,,,,,,Cash,N,,,,,,,0.0,3.0


In [70]:
df1 = df1.reset_index(drop = True)
df1.head(5)

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,home_ownership,annual_inc,verification_status,issue_d,pymnt_plan,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term,loan_repaid,num_emp_length
0,30000,30000,30000.0,36,22.35,1151.16,D,D5,Supervisor,MORTGAGE,100000.0,Source Verified,2018-12-01,n,,debt_consolidation,Debt consolidation,917xx,CA,30.46,0.0,2012-01-01,0.0,51.0,84.0,11.0,1.0,15603,37.0,19.0,w,0.0,0.0,30026.4425,30026.44,30000.0,26.44,0.0,0.0,0.0,2019-01-01,30082.32,NaT,2019-01-01,0.0,,Joint App,150000.0,23.38,Source Verified,0.0,0.0,472330.0,1.0,3.0,2.0,2.0,2.0,82850.0,75.0,0.0,1.0,9713.0,60.0,42200.0,1.0,1.0,3.0,4.0,42939.0,15181.0,46.9,0.0,0.0,83.0,73.0,23.0,2.0,1.0,23.0,,8.0,,0.0,3.0,4.0,3.0,5.0,10.0,6.0,8.0,4.0,11.0,0.0,0.0,0.0,2.0,89.5,33.3,1.0,0.0,527120.0,98453.0,28600.0,101984.0,52417.0,Jul-2006,0.0,1.0,16.0,25.2,2.0,15.0,0.0,0.0,70.0,N,,,,,,,,,,,,,,,Cash,N,,,,,,,0.0,5.0
1,40000,40000,40000.0,60,16.14,975.71,C,C4,Assistant to the Treasurer (Payroll),MORTGAGE,45000.0,Verified,2018-12-01,n,,credit_card,Credit card refinancing,456xx,OH,50.53,0.0,2009-06-01,0.0,,,18.0,0.0,34971,64.5,37.0,w,0.0,0.0,40856.679,40856.68,40000.0,856.68,0.0,0.0,0.0,2019-02-01,40910.48,NaT,2019-02-01,0.0,,Joint App,92000.0,35.66,Verified,0.0,0.0,271068.0,2.0,8.0,3.0,4.0,5.0,126749.0,87.0,1.0,1.0,5874.0,75.0,54200.0,4.0,2.0,4.0,5.0,15059.0,14930.0,67.3,0.0,0.0,114.0,70.0,2.0,2.0,1.0,2.0,,9.0,,0.0,7.0,9.0,7.0,7.0,26.0,9.0,10.0,9.0,18.0,0.0,0.0,0.0,4.0,100.0,42.9,0.0,0.0,344802.0,161720.0,45700.0,167965.0,47188.0,Apr-1990,0.0,1.0,32.0,61.1,16.0,19.0,0.0,0.0,,N,,,,,,,,,,,,,,,Cash,N,,,,,,,0.0,0.5
2,20000,20000,20000.0,36,7.56,622.68,A,A3,Teacher,MORTGAGE,100000.0,Not Verified,2018-12-01,n,,credit_card,Credit card refinancing,982xx,WA,18.92,0.0,1999-02-01,0.0,48.0,,9.0,0.0,25416,29.9,19.0,w,0.0,0.0,20215.7924,20215.79,20000.0,215.79,0.0,0.0,0.0,2019-02-01,20228.39,NaT,2019-02-01,0.0,,Joint App,190000.0,11.75,Not Verified,0.0,0.0,515779.0,1.0,2.0,0.0,1.0,13.0,46153.0,71.0,1.0,2.0,9759.0,39.0,85100.0,2.0,2.0,0.0,5.0,57309.0,59684.0,29.9,0.0,0.0,171.0,238.0,1.0,1.0,5.0,1.0,,13.0,48.0,0.0,5.0,5.0,5.0,6.0,5.0,5.0,9.0,5.0,9.0,0.0,0.0,0.0,1.0,94.7,20.0,0.0,0.0,622183.0,71569.0,85100.0,74833.0,43287.0,Aug-1998,0.0,3.0,10.0,29.7,2.0,7.0,0.0,0.0,,N,,,,,,,,,,,,,,,Cash,N,,,,,,,0.0,10.0
3,4500,4500,4500.0,36,11.31,147.99,B,B3,Accounts Examiner III,RENT,38500.0,Not Verified,2018-12-01,n,,credit_card,Credit card refinancing,786xx,TX,4.64,0.0,2003-12-01,0.0,25.0,,12.0,0.0,4472,15.3,25.0,w,0.0,0.0,4549.2171,4549.22,4500.0,49.22,0.0,0.0,0.0,2019-02-01,4553.46,NaT,2019-02-01,0.0,25.0,Individual,,,,0.0,306.0,29137.0,2.0,1.0,1.0,1.0,3.0,24665.0,100.0,3.0,7.0,3612.0,54.0,29200.0,0.0,3.0,3.0,8.0,2428.0,10735.0,28.9,0.0,0.0,180.0,69.0,2.0,2.0,0.0,2.0,,6.0,,1.0,3.0,5.0,5.0,6.0,10.0,11.0,15.0,5.0,12.0,0.0,0.0,0.0,4.0,91.7,0.0,0.0,0.0,53795.0,29137.0,15100.0,24595.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,,0.0,10.0
4,8425,8425,8425.0,36,27.27,345.18,E,E5,Senior Director Risk Management,MORTGAGE,450000.0,Verified,2018-12-01,n,,credit_card,Credit card refinancing,020xx,MA,12.37,0.0,1997-10-01,0.0,,,21.0,0.0,36812,65.7,37.0,w,0.0,0.0,8735.149,8735.15,8425.0,310.15,0.0,0.0,0.0,2019-02-01,8754.3,NaT,2019-02-01,0.0,,Joint App,515000.0,16.47,Not Verified,0.0,0.0,690320.0,0.0,6.0,1.0,6.0,12.0,152382.0,70.0,1.0,4.0,9744.0,68.0,56000.0,7.0,0.0,5.0,10.0,34516.0,11632.0,74.6,0.0,0.0,254.0,89.0,12.0,12.0,4.0,18.0,,4.0,,0.0,6.0,8.0,8.0,10.0,16.0,13.0,17.0,8.0,21.0,0.0,0.0,0.0,2.0,100.0,50.0,0.0,0.0,768304.0,189194.0,45800.0,189054.0,141905.0,Jun-2013,0.0,1.0,4.0,77.4,1.0,4.0,0.0,0.0,,N,,,,,,,,,,,,,,,Cash,N,,,,,,,0.0,3.0


-------------------------------------------------------------------------