In [131]:
# Data and Stats packages
import numpy as np
import pandas as pd
import re
import statsmodels.api as sm
from statsmodels.api import OLS
from sklearn import metrics, datasets
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.preprocessing import PolynomialFeatures
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler



# Visualization packages
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
matplotlib.rcParams['figure.figsize'] = (13.0, 6.0)

# Other
import itertools

# Aesthetic settings
from IPython.display import display
pd.set_option('display.max_columns', 999)
pd.set_option('display.width', 500)
sns.set_style('whitegrid')
sns.set_context('talk')

#Suppress warnings
import warnings
warnings.filterwarnings('ignore')


In [132]:
#import data
df_07to11 = pd.read_csv('./data/LoanStats3a.csv',header=1)
df_2016Q1 = pd.read_csv('./data/LoanStats_2016Q1.csv',header=1)
df_2016Q2 = pd.read_csv('./data/LoanStats_2016Q2.csv',header=1)
df_2016Q3 = pd.read_csv('./data/LoanStats_2016Q3.csv',header=1)
df_2016Q4 = pd.read_csv('./data/LoanStats_2016Q4.csv',header=1)

In [133]:
#concat quarterly dfs for 2016
df_16 = pd.concat([df_2016Q1,df_2016Q2,df_2016Q3,df_2016Q4])

In [134]:
#figure out index when 2007 data begins
not07 = True
i = 0
while(not07):
    val = df_07to11['issue_d'].values[i][-2:]  
    if val == '07':
        print(i)
        not07 = False
    i+=1

39533


In [136]:
#drop all data points before this (2008-2011 data)
df_07 = df_07to11.drop(np.arange(39533))

In [137]:
df_07.shape

(3005, 145)

In [139]:
df_07.head()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
39533,,,20000.0,20000.0,225.0,36 months,11.22%,656.86,C,C4,US Air Force,10+ years,RENT,38556.0,Not Verified,Dec-2007,Fully Paid,n,,Loan to consolidate debt,debt_consolidation,Debt Consolidation Loan,871xx,NM,14.04,0.0,Jan-2003,3.0,0.0,0.0,8.0,0.0,21777.0,53.7%,14.0,f,0.0,0.0,22583.266767,254.07,20000.0,2583.27,0.0,0.0,0.0,Jul-2009,9417.16,,Mar-2010,0.0,,1.0,Individual,,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
39534,,,20000.0,20000.0,2055.5,36 months,14.38%,687.25,E,E4,Groovemasters,2 years,RENT,42500.0,Not Verified,Jan-2008,Fully Paid,n,,"I would like to consolidate 10,000 of credit c...",debt_consolidation,Loan,902xx,CA,15.7,0.0,Nov-1997,0.0,43.0,0.0,5.0,0.0,9150.0,96.3%,22.0,f,0.0,0.0,24740.976693,2541.7,19999.99,4740.98,0.0,0.0,0.0,Jan-2011,697.12,,Jul-2014,0.0,,1.0,Individual,,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
39535,,,7500.0,7500.0,1167.14,36 months,10.59%,244.09,C,C2,SNHU,< 1 year,RENT,39975.0,Not Verified,Dec-2007,Fully Paid,n,,I am looking to pay off a $7500 credit card de...,credit_card,Getting rid of credit card debt for good,031xx,NH,12.1,2.0,Dec-1996,3.0,19.0,0.0,9.0,0.0,7914.0,30.8%,11.0,f,0.0,0.0,8802.18,1341.61,7500.0,1302.18,0.0,0.0,0.0,Jan-2011,236.1,,Dec-2010,0.0,,1.0,Individual,,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
39536,,,25000.0,25000.0,2550.009713,36 months,13.75%,851.41,E,E2,"Treasure Coast Pool Service,LLC",5 years,RENT,75000.0,Not Verified,Jan-2008,Charged Off,n,,To remodel kitchens and bathrooms.,home_improvement,Home Remodel,329xx,FL,19.78,0.0,May-2001,2.0,0.0,0.0,8.0,0.0,12308.0,43.6%,12.0,f,0.0,0.0,7662.69,781.1,5323.38,2338.5,0.0,0.81,0.0,Oct-2008,851.41,,Oct-2018,0.0,,1.0,Individual,,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
39537,,,2000.0,2000.0,1861.08,36 months,7.75%,62.45,A,A3,Pamlico Home Builders and Supplies,10+ years,RENT,24000.0,Not Verified,Dec-2007,Charged Off,n,,This loan will be used to help me as a single ...,moving,Main Street,285xx,NC,2.4,0.0,Sep-2001,1.0,0.0,0.0,5.0,0.0,949.0,55.8%,8.0,f,0.0,0.0,1111.73,1041.62,886.3,174.5,0.0,50.93,0.63,Jun-2009,62.45,,May-2017,0.0,,1.0,Individual,,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,


In [140]:
df_16.shape

(434415, 145)

In [141]:
df_16.head()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
0,,,8400.0,8400.0,8400.0,36 months,9.75%,270.06,B,B3,RV Tech,2 years,MORTGAGE,66000.0,Not Verified,Mar-2016,Fully Paid,n,,,debt_consolidation,Debt consolidation,301xx,GA,13.84,0.0,Jun-2005,1.0,60.0,,7.0,0.0,11059.0,78.4%,22.0,w,0.0,0.0,9449.400972,9449.4,8400.0,1049.4,0.0,0.0,0.0,Dec-2017,4327.36,,Dec-2017,1.0,60.0,1.0,Individual,,,,0.0,0.0,157051.0,1.0,1.0,1.0,1.0,10.0,19231.0,91.0,2.0,2.0,3019.0,86.0,14100.0,1.0,1.0,3.0,4.0,22436.0,585.0,90.6,0.0,0.0,129.0,129.0,2.0,2.0,2.0,89.0,,2.0,,2.0,2.0,4.0,2.0,7.0,2.0,5.0,12.0,4.0,7.0,0.0,0.0,0.0,3.0,86.4,100.0,0.0,0.0,166260.0,30290.0,6200.0,21079.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
1,,,12000.0,12000.0,12000.0,36 months,7.89%,375.43,A,A5,facility manager,3 years,OWN,45000.0,Source Verified,Mar-2016,Current,n,,,credit_card,Credit card refinancing,454xx,OH,15.65,0.0,Aug-1994,0.0,89.0,,9.0,0.0,23972.0,39.9%,16.0,w,2201.55,2201.55,11252.38,11252.38,9798.45,1453.93,0.0,0.0,0.0,Oct-2018,375.43,Oct-2018,Oct-2018,0.0,89.0,1.0,Individual,,,,0.0,0.0,23972.0,0.0,0.0,0.0,0.0,165.0,0.0,,1.0,5.0,11141.0,40.0,60100.0,0.0,0.0,2.0,5.0,2664.0,33197.0,40.4,0.0,0.0,165.0,259.0,9.0,9.0,0.0,9.0,89.0,6.0,89.0,1.0,5.0,6.0,6.0,8.0,1.0,9.0,15.0,6.0,9.0,0.0,0.0,0.0,1.0,93.7,66.7,0.0,0.0,60100.0,23972.0,55700.0,0.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
2,,,28000.0,28000.0,28000.0,36 months,7.39%,869.57,A,A4,graphic designer ii,1 year,RENT,60000.0,Verified,Mar-2016,Current,n,,,debt_consolidation,Debt consolidation,750xx,TX,21.48,0.0,Aug-1994,0.0,,,6.0,0.0,17197.0,34.4%,16.0,w,5106.41,5106.41,26064.11,26064.11,22893.59,3170.52,0.0,0.0,0.0,Oct-2018,869.57,Oct-2018,Oct-2018,0.0,,1.0,Individual,,,,0.0,0.0,29077.0,0.0,1.0,0.0,0.0,41.0,11880.0,,0.0,0.0,17022.0,34.0,50000.0,0.0,0.0,0.0,0.0,4846.0,32803.0,34.4,0.0,0.0,128.0,259.0,37.0,37.0,0.0,37.0,,,,0.0,3.0,3.0,5.0,5.0,6.0,5.0,10.0,3.0,6.0,0.0,0.0,0.0,0.0,100.0,20.0,0.0,0.0,87315.0,29077.0,50000.0,37315.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
3,,,10000.0,10000.0,10000.0,36 months,13.67%,340.18,C,C3,Cantor,10+ years,RENT,70000.0,Source Verified,Mar-2016,Current,n,,,other,Other,331xx,FL,14.74,0.0,Mar-1989,1.0,38.0,55.0,4.0,1.0,2057.0,16.1%,9.0,w,1961.93,1961.93,10190.21,10190.21,8038.07,2152.14,0.0,0.0,0.0,Oct-2018,340.18,Oct-2018,Oct-2018,0.0,,1.0,Individual,,,,0.0,0.0,27057.0,1.0,2.0,0.0,0.0,35.0,25000.0,58.0,1.0,1.0,1607.0,50.0,11400.0,1.0,0.0,2.0,1.0,6764.0,9343.0,16.1,0.0,0.0,123.0,324.0,2.0,2.0,0.0,2.0,,2.0,,0.0,2.0,2.0,2.0,4.0,4.0,2.0,5.0,2.0,4.0,0.0,0.0,0.0,1.0,75.0,0.0,1.0,0.0,54713.0,27057.0,11400.0,43313.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
4,,,20000.0,20000.0,20000.0,36 months,11.99%,664.2,C,C1,rn case manager,10+ years,MORTGAGE,49000.0,Source Verified,Mar-2016,Fully Paid,n,,,debt_consolidation,Debt consolidation,321xx,FL,15.58,1.0,Jun-2001,0.0,15.0,,8.0,0.0,19014.0,89.7%,28.0,w,0.0,0.0,23693.642605,23693.64,20000.0,3693.64,0.0,0.0,0.0,Aug-2018,5786.88,,Aug-2018,0.0,35.0,1.0,Individual,,,,0.0,0.0,125922.0,0.0,1.0,0.0,0.0,34.0,10623.0,87.0,0.0,0.0,7414.0,89.0,21200.0,0.0,5.0,0.0,0.0,15740.0,481.0,96.3,0.0,0.0,122.0,112.0,55.0,34.0,5.0,111.0,,,,4.0,2.0,6.0,2.0,4.0,11.0,6.0,12.0,6.0,8.0,0.0,0.0,0.0,0.0,82.1,100.0,0.0,0.0,140308.0,29637.0,13100.0,12193.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,


In [142]:
#function to clean datasets
def clean(df_07):
    preds = ['loan_status'
         ,'loan_amnt'
         ,'funded_amnt'
         ,'term'
         ,'int_rate'
         ,'installment'
         ,'emp_length'
         ,'home_ownership'
         ,'annual_inc'
         ,'issue_d'
         ,'zip_code'
         ,'addr_state'
         ,'dti'
         ,'delinq_2yrs'
         ,'earliest_cr_line'
         ,'inq_last_6mths'
         ,'mths_since_last_delinq'
         ,'open_acc'
         ,'pub_rec'
         ,'revol_bal'
         ,'revol_util'
         ,'total_acc'
        ]

    states = df_07['addr_state'].unique().tolist()
    
    df_07 = df_07[preds]

    df_07['term'] = df_07['term'].str.rstrip(' months').astype('float')
    df_07['int_rate'] = df_07['int_rate'].str.rstrip('%').astype('float') / 100.0
    df_07['emp_length'] = df_07['emp_length'].str.strip('< + years').astype('float')
    df_07['issue_d'] = pd.to_datetime(df_07['issue_d'])
    df_07['zip_code'] = df_07['zip_code'].str.replace('xx','00').astype('float')
    df_07['earliest_cr_line'] = pd.to_datetime(df_07['earliest_cr_line'])
    
    df_07['length_credit_history'] = (df_07['issue_d']-df_07['earliest_cr_line']).astype('timedelta64[D]')/30.44
    df_07['mths_since_last_delinq'] = np.where(df_07['mths_since_last_delinq'].isna() , df_07['length_credit_history'], df_07['mths_since_last_delinq'])
    df_07['revol_util'] = df_07['revol_util'].str.rstrip('%').astype('float') / 100.0
    df_07 = pd.get_dummies(df_07, columns = ['home_ownership'] )
    df_07 = pd.get_dummies(df_07, columns = ['addr_state'] )

    df_07['loan_status'] = np.where(df_07['loan_status'] == 'Fully Paid' , 'paid', df_07['loan_status'])
    df_07['loan_status'] = np.where(df_07['loan_status'] == 'Charged Off' , 'default', df_07['loan_status'])
    df_07['loan_status'] = np.where(df_07['loan_status'] == 'Does not meet the credit policy. Status:Fully Paid' , 'paid', df_07['loan_status'])
    df_07['loan_status'] = np.where(df_07['loan_status'] == 'Does not meet the credit policy. Status:Charged Off' , 'default', df_07['loan_status'])
    
    preds.append('length_credit_history')

    df_07 = df_07.dropna()
    return df_07

In [143]:
#clean the dfs
df_07 = clean(df_07to11)
df_16 = clean(df_16)

In [144]:
df_07.head()

Unnamed: 0,loan_status,loan_amnt,funded_amnt,term,int_rate,installment,emp_length,annual_inc,issue_d,zip_code,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,open_acc,pub_rec,revol_bal,revol_util,total_acc,length_credit_history,home_ownership_MORTGAGE,home_ownership_NONE,home_ownership_OTHER,home_ownership_OWN,home_ownership_RENT,addr_state_AK,addr_state_AL,addr_state_AR,addr_state_AZ,addr_state_CA,addr_state_CO,addr_state_CT,addr_state_DC,addr_state_DE,addr_state_FL,addr_state_GA,addr_state_HI,addr_state_IA,addr_state_ID,addr_state_IL,addr_state_IN,addr_state_KS,addr_state_KY,addr_state_LA,addr_state_MA,addr_state_MD,addr_state_ME,addr_state_MI,addr_state_MN,addr_state_MO,addr_state_MS,addr_state_MT,addr_state_NC,addr_state_NE,addr_state_NH,addr_state_NJ,addr_state_NM,addr_state_NV,addr_state_NY,addr_state_OH,addr_state_OK,addr_state_OR,addr_state_PA,addr_state_RI,addr_state_SC,addr_state_SD,addr_state_TN,addr_state_TX,addr_state_UT,addr_state_VA,addr_state_VT,addr_state_WA,addr_state_WI,addr_state_WV,addr_state_WY
0,paid,5000.0,5000.0,36.0,0.1065,162.87,10.0,24000.0,2011-12-01,86000.0,27.65,0.0,1985-01-01,1.0,322.930355,3.0,0.0,13648.0,0.837,9.0,322.930355,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,default,2500.0,2500.0,60.0,0.1527,59.83,1.0,30000.0,2011-12-01,30900.0,1.0,0.0,1999-04-01,5.0,152.003942,3.0,0.0,1687.0,0.094,4.0,152.003942,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,paid,2400.0,2400.0,36.0,0.1596,84.33,10.0,12252.0,2011-12-01,60600.0,8.72,0.0,2001-11-01,2.0,120.959264,2.0,0.0,2956.0,0.985,10.0,120.959264,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,paid,10000.0,10000.0,36.0,0.1349,339.31,10.0,49200.0,2011-12-01,91700.0,20.0,0.0,1996-02-01,1.0,35.0,10.0,0.0,5598.0,0.21,37.0,189.947438,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,paid,3000.0,3000.0,60.0,0.1269,67.79,1.0,80000.0,2011-12-01,97200.0,17.94,0.0,1996-01-01,0.0,38.0,15.0,0.0,27783.0,0.539,38.0,190.965834,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [145]:
df_16.head()

Unnamed: 0,loan_status,loan_amnt,funded_amnt,term,int_rate,installment,emp_length,annual_inc,issue_d,zip_code,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,open_acc,pub_rec,revol_bal,revol_util,total_acc,length_credit_history,home_ownership_ANY,home_ownership_MORTGAGE,home_ownership_OWN,home_ownership_RENT,addr_state_AK,addr_state_AL,addr_state_AR,addr_state_AZ,addr_state_CA,addr_state_CO,addr_state_CT,addr_state_DC,addr_state_DE,addr_state_FL,addr_state_GA,addr_state_HI,addr_state_ID,addr_state_IL,addr_state_IN,addr_state_KS,addr_state_KY,addr_state_LA,addr_state_MA,addr_state_MD,addr_state_ME,addr_state_MI,addr_state_MN,addr_state_MO,addr_state_MS,addr_state_MT,addr_state_NC,addr_state_ND,addr_state_NE,addr_state_NH,addr_state_NJ,addr_state_NM,addr_state_NV,addr_state_NY,addr_state_OH,addr_state_OK,addr_state_OR,addr_state_PA,addr_state_RI,addr_state_SC,addr_state_SD,addr_state_TN,addr_state_TX,addr_state_UT,addr_state_VA,addr_state_VT,addr_state_WA,addr_state_WI,addr_state_WV,addr_state_WY
0,paid,8400.0,8400.0,36.0,0.0975,270.06,2.0,66000.0,2016-03-01,30100.0,13.84,0.0,2005-06-01,1.0,60.0,7.0,0.0,11059.0,0.784,22.0,128.975033,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Current,12000.0,12000.0,36.0,0.0789,375.43,3.0,45000.0,2016-03-01,45400.0,15.65,0.0,1994-08-01,0.0,89.0,9.0,0.0,23972.0,0.399,16.0,258.968463,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Current,28000.0,28000.0,36.0,0.0739,869.57,1.0,60000.0,2016-03-01,75000.0,21.48,0.0,1994-08-01,0.0,258.968463,6.0,0.0,17197.0,0.344,16.0,258.968463,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
3,Current,10000.0,10000.0,36.0,0.1367,340.18,10.0,70000.0,2016-03-01,33100.0,14.74,0.0,1989-03-01,1.0,38.0,4.0,1.0,2057.0,0.161,9.0,323.981603,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,paid,20000.0,20000.0,36.0,0.1199,664.2,10.0,49000.0,2016-03-01,32100.0,15.58,1.0,2001-06-01,0.0,15.0,8.0,0.0,19014.0,0.897,28.0,176.971091,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
