# Pandas
> - 데이터 과학자를 위해 **테이블형태**로 데이터를 다룰 수 있게 해주는 패키지(python용 엑셀)
- 기존 데이터처리 라이브러리인 numpy 대신 주로 사용
- 일반인이 데이터분석을 접하기 쉽게 만들어준 결정적인 라이브러리
- pandas만으로도 충분히 데이터 분석이 가능할 정도로 고수준의 함수들을 내장
- 앞으로 진행하는 데이터분석 과정에서 주로 사용하게 될 데이터구조

# pandas 설치
    
> 콘솔창에서 실행 시  
**`pip install pandas`**  
**`conda install pandas`**
    
> 주피터 노트북으로 실행 시  
**`!pip install pandas`**
    
아나콘다 환경으로 python 환경설정 시 기본적으로 설치가 되어있음

In [None]:
# pandas 설치
# !pip install pandas

In [1]:
# pandas import
import pandas as pd
# pd라는 닉네임은 많은 파이썬 유저들이 사용하고 있는 닉네임, 분석을 위한 필수는 아니지만 되도록이면 위와 같이 사용을 해줍시다.

# pd.options.display.max_columns = 200
# 불러들이는 데이터에 맞춰 모든 컬럼을 확인 가능하도록 옵션값을 주었습니다.
# pd.options.display.max_info_columns =200
# 그냥 실행 시키시고 지금 이해 못하셔도 좋습니다.

# numpy import
import numpy as np

## DataFrame
> - 엑셀에 익숙한 사용자를 위해 제작 된 **테이블형태의 데이터 구조**  
- 다양한 형태의 데이터를 받아 사용할 수 있으며 다양한 **통계, 시각화 함수를 제공**한다.  

실제 데이터를 불러들이고 값을 확인 해 보며 기본적인 pandas 사용법을 익혀보도록 하겠습니다.

### 데이터 불러오기
pandas는 다양한 데이터 파일 형태를 지원하며 주로 csv, xlsx, sql, json을 사용합니다.
    
> **`read_csv()`**  
**`read_excel()`**  
**`read_sql()`**  
**`read_json()`**  
**`json_normalize()`**

In [2]:
import pandas as pd

In [3]:
%%time
# DataFrame 의 약자로서 형식적으로 df 변수명을 사용한다.
# pandas패키지의 read_csv() 함수를 사용하여 loan.csv 파일을 불러들여 데이터프레임을 만들고 df 이름의 변수로 저장
df = pd.read_csv('./data/loan1.csv')
df.head()

CPU times: user 316 ms, sys: 55.5 ms, total: 372 ms
Wall time: 375 ms


In [3]:
%%time
# 만약 모듈을 찾을 수 없는 오류가 발생한다면 추가 모듈 설치
# !pip install xlrd
df = pd.read_excel('./data/loan1.xlsx')

CPU times: user 56.4 s, sys: 284 ms, total: 56.7 s
Wall time: 56.7 s


In [6]:
# 데이터프레임 확인
df

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,...,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
0,,,10000,10000,10000.0,36 months,9.44,320.05,B,B1,...,,,Cash,N,,,,,,
1,,,3500,3500,3500.0,36 months,10.42,113.63,B,B3,...,,,Cash,N,,,,,,
2,,,5000,5000,5000.0,36 months,13.59,169.90,C,C2,...,,,Cash,N,,,,,,
3,,,14000,14000,14000.0,36 months,10.91,457.75,B,B4,...,,,Cash,N,,,,,,
4,,,5000,5000,5000.0,36 months,13.59,169.90,C,C2,...,,,Cash,N,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,,,13000,13000,13000.0,60 months,18.06,330.54,D,D2,...,,,Cash,N,,,,,,
49996,,,12000,12000,12000.0,60 months,13.59,276.68,C,C2,...,,,Cash,N,,,,,,
49997,,,20000,20000,20000.0,60 months,15.05,476.33,C,C4,...,,,Cash,N,,,,,,
49998,,,7000,7000,7000.0,36 months,7.35,217.27,A,A4,...,,,Cash,N,,,,,,


In [None]:
# 참고! 실습은 하지 않습니다만 쿼리를 사용하여 데이터베이스로부터 데이터프레임을 만드는 것도 가능합니다.
# 데이터베이스로 부터 자료 읽기

# 필요한 모듈 추가 설치 - 각 데이터베이스 별로 다릅니다.
# !pip install pymysql

# sql 모듈 로드하기
# import pymysql
# mysql, mariadb, sqlite, postgresql, ms-sql, oracle, mongodb

# 접속하기
# 접속방법 또한 DB 종류에 따라 다릅니다.
# con = pymysql.connect(host='db서버주소', port=3306, user='id', passwd='pwd', db='dbname')

# query 만들기
# query = 'select * from samples'

# 자료 불러오기
# data = pd.read_sql(query, con=con)

### 데이터 저장하기
불러들인 혹은 작업을 마친 데이터프레임을 다양한 파일형태로 저장이 가능합니다.
    
> **`to_csv()`**  
**`to_excel()`**  
**`to_sql()`**

In [None]:
%%time
df.to_csv('loan1_test.csv', index=False)

In [None]:
%%time
df.to_excel('loan1_test.xlsx')

### 사용 데이터 간략 설명
> 미국 핀테크 회사인 lending club의 대출 데이터베이스  
클라우드펀딩과 대출을 결합한 핀테크의 시초라고 부를 수 있는 회사  
방대한 양의 대출정보를 공개하면서 금융정보분석에도 기여한 공이 큰 데이터  
2007 ~ 2015 년 대출정보 및 개인정보를 담고 있음  
226만건, 145항목 정보를 담고있음  
실습데이터는 이 중 4만건을 추출한 데이터를 사용합니다.  

데이터출처: https://www.kaggle.com/wordsforthewise/lending-club

### 데이터 살펴보기

In [7]:
# 데이터를 불러들인 후 가장 처음 하는 작업
# 데이터의 구조, 형태 파악하기
# 데이터의 첫 5개 샘플 확인하기
df.head()
# 10개를 확인하려면?

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,...,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
0,,,10000,10000,10000.0,36 months,9.44,320.05,B,B1,...,,,Cash,N,,,,,,
1,,,3500,3500,3500.0,36 months,10.42,113.63,B,B3,...,,,Cash,N,,,,,,
2,,,5000,5000,5000.0,36 months,13.59,169.9,C,C2,...,,,Cash,N,,,,,,
3,,,14000,14000,14000.0,36 months,10.91,457.75,B,B4,...,,,Cash,N,,,,,,
4,,,5000,5000,5000.0,36 months,13.59,169.9,C,C2,...,,,Cash,N,,,,,,


In [8]:
# 데이터의 마지막 5개 샘플 확인하기
# 데이터가 잘 가져왔는지 확인 할 때 보통 씁니다.
df.tail()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,...,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
49995,,,13000,13000,13000.0,60 months,18.06,330.54,D,D2,...,,,Cash,N,,,,,,
49996,,,12000,12000,12000.0,60 months,13.59,276.68,C,C2,...,,,Cash,N,,,,,,
49997,,,20000,20000,20000.0,60 months,15.05,476.33,C,C4,...,,,Cash,N,,,,,,
49998,,,7000,7000,7000.0,36 months,7.35,217.27,A,A4,...,,,Cash,N,,,,,,
49999,,,6000,6000,6000.0,36 months,13.59,203.88,C,C2,...,,,Cash,N,,,,,,


In [9]:
# 데이터의 갯수를 살펴봅니다
len(df)

50000

In [12]:
# 데이터의 전반적인 정보를 확인합니다.
df.info(verbose=True)
# dtype 정보에서는 각 컬럼별 데이터 타입을 확인 할 수 있습니다.
# object == str 이라고 생각하셔도 무방합니다.
# verbose, null_counts

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 145 columns):
 #   Column                                      Dtype  
---  ------                                      -----  
 0   id                                          float64
 1   member_id                                   float64
 2   loan_amnt                                   int64  
 3   funded_amnt                                 int64  
 4   funded_amnt_inv                             float64
 5   term                                        object 
 6   int_rate                                    float64
 7   installment                                 float64
 8   grade                                       object 
 9   sub_grade                                   object 
 10  emp_title                                   object 
 11  emp_length                                  object 
 12  home_ownership                              object 
 13  annual_inc                    

In [13]:
# 데이터의 기초통계량을 확인합니다.
df.describe()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,url,desc,...,deferral_term,hardship_amount,hardship_length,hardship_dpd,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,settlement_amount,settlement_percentage,settlement_term
count,0.0,0.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,0.0,0.0,...,158.0,158.0,158.0,158.0,132.0,158.0,158.0,175.0,175.0,175.0
mean,,,15315.2945,15315.2945,15309.765269,12.680199,445.767806,77929.47,,,...,3.0,205.303544,3.0,14.848101,569.305,15359.178608,226.187342,7037.168571,53.648914,17.777143
std,,,9919.945835,9919.945835,9918.352697,4.952463,281.535755,79916.56,,,...,0.0,152.0081,0.0,8.608378,411.479679,8892.329078,215.081852,5175.967267,9.880582,6.229476
min,,,1000.0,1000.0,1000.0,5.32,7.61,0.0,,,...,3.0,10.54,3.0,0.0,31.62,1012.11,0.24,500.0,35.64,1.0
25%,,,8000.0,8000.0,8000.0,9.44,239.6175,45000.0,,,...,3.0,84.7975,3.0,7.5,248.865,7612.24,49.7525,3204.92,45.0,14.0
50%,,,12200.0,12200.0,12200.0,11.99,368.06,65000.0,,,...,3.0,169.85,3.0,15.0,446.595,13649.6,156.69,5212.01,50.01,18.0
75%,,,20000.0,20000.0,20000.0,15.05,602.3,94000.0,,,...,3.0,287.275,3.0,22.0,821.775,21798.91,345.5475,9899.0,60.0,24.0
max,,,40000.0,40000.0,40000.0,30.99,1618.03,8000000.0,,,...,3.0,833.67,3.0,30.0,2335.47,35840.94,1026.41,24554.0,98.57,24.0


In [14]:
# numpy 함수로 데이터 shape 확인
np.shape(df)

(50000, 145)

In [15]:
# 인덱스
df.index

RangeIndex(start=0, stop=50000, step=1)

In [16]:
# 컬럼
df.columns

Index(['id', 'member_id', 'loan_amnt', 'funded_amnt', 'funded_amnt_inv',
       'term', 'int_rate', 'installment', 'grade', 'sub_grade',
       ...
       'hardship_payoff_balance_amount', 'hardship_last_payment_amount',
       'disbursement_method', 'debt_settlement_flag',
       'debt_settlement_flag_date', 'settlement_status', 'settlement_date',
       'settlement_amount', 'settlement_percentage', 'settlement_term'],
      dtype='object', length=145)

데이터셋을 살펴 본 결과 정체를 알 수 없는 많은 컬럼이 있는 걸 확인했고, 

50000개의 샘플이 불러들여진 것을 확인 할 수 있었습니다.

추가로 데이터 중간 중간 비어있는 값도 있는 것을 확인했습니다.

### 데이터접근 (인덱싱, 슬라이싱, 샘플링)

In [17]:
df.head()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,...,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
0,,,10000,10000,10000.0,36 months,9.44,320.05,B,B1,...,,,Cash,N,,,,,,
1,,,3500,3500,3500.0,36 months,10.42,113.63,B,B3,...,,,Cash,N,,,,,,
2,,,5000,5000,5000.0,36 months,13.59,169.9,C,C2,...,,,Cash,N,,,,,,
3,,,14000,14000,14000.0,36 months,10.91,457.75,B,B4,...,,,Cash,N,,,,,,
4,,,5000,5000,5000.0,36 months,13.59,169.9,C,C2,...,,,Cash,N,,,,,,


In [40]:
# 첫 샘플 혹은 레코드(대출건)에 대한 데이터를 살펴보겠습니다.
# 인덱스넘버로 데이터에 접근하는 .iloc[색인]
df.iloc[0]

id                                                                NaN
member_id                                                         NaN
loan_amnt                                                       10000
funded_amnt                                                     10000
funded_amnt_inv                                                 10000
term                                                        36 months
int_rate                                                         9.44
installment                                                    320.05
grade                                                               B
sub_grade                                                          B1
emp_title                                                    mechanic
emp_length                                                    6 years
home_ownership                                               MORTGAGE
annual_inc                                                      80000
verification_status 

In [41]:
# 10번 인덱스 부터 20번 인덱스 샘플 접근
df.iloc[10:20]

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
10,,,40000,40000,40000.0,60 months,16.02,973.15,C,C5,IT Manager - Business Process,10+ years,OWN,140000.0,Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,292xx,SC,31.79,1.0,Aug-1999,0.0,3.0,,11.0,0.0,34632,59.5,52.0,w,33288.83,33288.83,13602.03,13602.03,6711.17,6890.86,0.0,0.0,0.0,Feb-2019,973.15,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,59.0,81346.0,0.0,3.0,0.0,0.0,29.0,46714.0,50.0,0.0,1.0,19100.0,53.0,104500.0,0.0,0.0,1.0,1.0,8135.0,63703.0,57.9,0.0,0.0,220.0,213.0,13.0,13.0,0.0,13.0,,9.0,3.0,0.0,2.0,6.0,2.0,17.0,22.0,8.0,30.0,6.0,11.0,0.0,0.0,0.0,0.0,98.0,0.0,0.0,0.0,197500.0,81346.0,88000.0,93000.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
11,,,18000,18000,18000.0,60 months,13.59,415.02,C,C2,Strategic Account Manager,1 year,MORTGAGE,325000.0,Source Verified,Dec-2017,In Grace Period,n,,,debt_consolidation,Debt consolidation,027xx,MA,14.12,0.0,Nov-2003,0.0,67.0,,16.0,0.0,28674,81.7,38.0,w,15060.28,15060.28,5368.08,5368.08,2939.72,2428.36,0.0,0.0,0.0,Jan-2019,415.02,Mar-2019,Feb-2019,0.0,67.0,1,Individual,,,,0.0,0.0,709276.0,2.0,12.0,3.0,7.0,3.0,265051.0,88.0,0.0,0.0,17184.0,88.0,35100.0,2.0,0.0,7.0,7.0,44330.0,6426.0,81.7,0.0,0.0,118.0,169.0,44.0,3.0,5.0,44.0,67.0,1.0,67.0,1.0,2.0,2.0,2.0,9.0,18.0,2.0,14.0,2.0,16.0,0.0,0.0,0.0,3.0,97.3,100.0,0.0,0.0,755076.0,293725.0,35100.0,276976.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
12,,,6025,6025,6025.0,36 months,17.09,215.08,D,D1,Account Manager,5 years,RENT,62000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,010xx,MA,14.65,0.0,Aug-2004,1.0,,,9.0,0.0,6363,37.4,15.0,w,750.33,750.33,6002.55,6002.55,5274.67,727.88,0.0,0.0,0.0,Feb-2019,215.08,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,578.0,21201.0,0.0,3.0,1.0,2.0,9.0,14838.0,57.0,1.0,3.0,1997.0,49.0,17000.0,2.0,0.0,3.0,5.0,2356.0,10637.0,37.4,0.0,0.0,157.0,160.0,8.0,8.0,0.0,8.0,,2.0,,0.0,5.0,5.0,5.0,5.0,8.0,5.0,7.0,5.0,9.0,0.0,0.0,0.0,2.0,100.0,40.0,0.0,0.0,42956.0,21201.0,17000.0,25956.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
13,,,18000,18000,18000.0,36 months,9.44,576.09,B,B1,sales associate,2 years,OWN,22000.0,Source Verified,Dec-2017,Fully Paid,n,,,debt_consolidation,Debt consolidation,439xx,OH,31.21,0.0,Feb-2008,3.0,,,15.0,0.0,1799,4.6,20.0,w,0.0,0.0,19243.700856,19243.7,18000.0,1243.7,0.0,0.0,0.0,Oct-2018,14077.77,,Feb-2019,0.0,,1,Joint App,70000.0,22.15,Source Verified,0.0,0.0,15737.0,3.0,2.0,1.0,1.0,5.0,13938.0,73.0,5.0,9.0,1499.0,27.0,39300.0,1.0,0.0,5.0,10.0,1049.0,24001.0,5.9,0.0,0.0,53.0,118.0,4.0,4.0,0.0,10.0,,4.0,,0.0,1.0,2.0,5.0,5.0,4.0,13.0,16.0,2.0,15.0,0.0,0.0,0.0,6.0,100.0,0.0,0.0,0.0,58296.0,15737.0,25500.0,18996.0,15710.0,Nov-2015,4.0,0.0,12.0,79.7,1.0,11.0,0.0,0.0,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
14,,,10200,10200,10200.0,36 months,11.99,338.74,B,B5,Machine operator,4 years,RENT,35000.0,Source Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,635xx,MO,15.26,0.0,Dec-2008,1.0,,,6.0,0.0,4238,26.2,19.0,w,6662.23,6662.23,4745.71,4745.71,3537.77,1191.0,16.94,0.0,0.0,Feb-2019,338.74,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,21882.0,1.0,1.0,1.0,1.0,11.0,17644.0,90.0,1.0,4.0,2193.0,61.0,16200.0,0.0,0.0,5.0,5.0,3647.0,5493.0,31.3,0.0,0.0,108.0,26.0,4.0,4.0,0.0,14.0,,4.0,,0.0,2.0,3.0,3.0,3.0,14.0,5.0,5.0,3.0,6.0,0.0,0.0,0.0,2.0,100.0,33.3,0.0,0.0,35828.0,21882.0,8000.0,19628.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
15,,,28000,28000,28000.0,36 months,12.62,938.32,C,C1,Vice President,3 years,RENT,270000.0,Source Verified,Dec-2017,Fully Paid,n,,,credit_card,Credit card refinancing,945xx,CA,8.25,0.0,Mar-1996,0.0,65.0,,5.0,0.0,16087,96.9,14.0,w,0.0,0.0,29679.376794,29679.38,28000.0,1679.38,0.0,0.0,0.0,May-2018,25867.2,,Dec-2018,0.0,65.0,1,Individual,,,,0.0,0.0,79735.0,1.0,2.0,1.0,2.0,3.0,63648.0,94.0,0.0,1.0,6758.0,96.0,16600.0,1.0,3.0,1.0,3.0,15947.0,513.0,96.9,0.0,0.0,143.0,261.0,16.0,3.0,1.0,16.0,,3.0,,1.0,3.0,3.0,3.0,4.0,7.0,3.0,6.0,3.0,5.0,0.0,0.0,0.0,1.0,92.9,100.0,0.0,0.0,92465.0,79735.0,16600.0,75865.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
16,,,23000,23000,23000.0,60 months,19.03,597.02,D,D3,Installation Manager,10+ years,MORTGAGE,75000.0,Source Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,585xx,ND,27.64,0.0,Dec-2002,1.0,43.0,,14.0,0.0,21795,56.6,22.0,w,19390.68,19390.68,8309.65,8309.65,3609.32,4700.33,0.0,0.0,0.0,Feb-2019,597.02,Mar-2019,Feb-2019,0.0,50.0,1,Individual,,,,0.0,0.0,165604.0,0.0,4.0,0.0,2.0,13.0,35227.0,56.0,1.0,2.0,7087.0,56.0,38500.0,2.0,3.0,1.0,4.0,12739.0,16705.0,56.6,0.0,0.0,179.0,157.0,11.0,11.0,4.0,11.0,,0.0,50.0,1.0,6.0,6.0,8.0,8.0,8.0,9.0,10.0,6.0,14.0,0.0,0.0,0.0,1.0,90.9,12.5,0.0,0.0,224502.0,57022.0,38500.0,63302.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
17,,,10000,10000,10000.0,60 months,21.45,273.08,D,D5,Senior Agent,2 years,RENT,26685.0,Verified,Dec-2017,Late (31-120 days),n,,,moving,Moving and relocation,331xx,FL,23.12,0.0,Sep-2011,0.0,,,4.0,0.0,3083,28.0,5.0,w,8787.42,8787.42,3369.63,3369.63,1212.58,2112.05,45.0,0.0,0.0,Jan-2019,1103.48,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,17794.0,0.0,1.0,0.0,1.0,22.0,14711.0,74.0,0.0,0.0,2074.0,58.0,11000.0,0.0,0.0,0.0,1.0,4449.0,7917.0,28.0,0.0,0.0,22.0,75.0,31.0,22.0,0.0,54.0,,,,0.0,2.0,2.0,3.0,3.0,1.0,3.0,4.0,2.0,4.0,0.0,0.0,0.0,0.0,100.0,66.7,0.0,0.0,30845.0,17794.0,11000.0,19845.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
18,,,12000,12000,12000.0,60 months,12.62,270.71,C,C1,commercial sales,10+ years,OWN,40000.0,Source Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,394xx,MS,7.74,0.0,Nov-1986,0.0,,,3.0,0.0,5742,13.9,10.0,w,9832.57,9832.57,3773.11,3773.11,2167.43,1605.68,0.0,0.0,0.0,Feb-2019,270.71,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,383.0,5742.0,0.0,0.0,0.0,1.0,24.0,0.0,,0.0,0.0,5581.0,14.0,41300.0,0.0,0.0,0.0,1.0,1914.0,35558.0,13.9,0.0,0.0,137.0,372.0,52.0,24.0,0.0,52.0,,,,0.0,2.0,2.0,3.0,7.0,2.0,3.0,8.0,2.0,3.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,41300.0,5742.0,41300.0,0.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
19,,,1500,1500,1500.0,36 months,18.06,54.28,D,D2,sales,10+ years,RENT,75000.0,Source Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,197xx,DE,9.5,0.0,Mar-2004,2.0,,85.0,7.0,1.0,8265,62.6,12.0,w,1009.98,1009.98,767.45,767.45,490.02,277.43,0.0,0.0,0.0,Feb-2019,54.28,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,22912.0,2.0,1.0,1.0,1.0,4.0,14647.0,98.0,1.0,2.0,5966.0,81.0,13200.0,1.0,0.0,2.0,3.0,3819.0,34.0,99.4,0.0,0.0,155.0,165.0,1.0,1.0,0.0,49.0,,1.0,,0.0,1.0,3.0,1.0,2.0,5.0,6.0,7.0,3.0,7.0,0.0,0.0,0.0,2.0,100.0,100.0,1.0,0.0,28200.0,22912.0,6000.0,15000.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,


In [42]:
# 첫번째 0, 10, 20 인덱스 샘플 접근
df.iloc[[0, 10, 20]]

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
0,,,10000,10000,10000.0,36 months,9.44,320.05,B,B1,mechanic,6 years,MORTGAGE,80000.0,Not Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,762xx,TX,14.82,0.0,Jul-2007,0.0,34.0,,8.0,0.0,5225,73.6,30.0,w,6442.28,6442.28,4493.81,4493.81,3557.72,936.09,0.0,0.0,0.0,Feb-2019,320.05,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,173110.0,0.0,2.0,0.0,2.0,23.0,12496.0,39.0,0.0,0.0,3949.0,45.0,7100.0,1.0,0.0,0.0,2.0,21639.0,1875.0,73.6,0.0,0.0,125.0,78.0,26.0,23.0,3.0,26.0,,21.0,,0.0,2.0,2.0,4.0,4.0,21.0,4.0,5.0,2.0,8.0,0.0,0.0,0.0,0.0,96.4,25.0,0.0,0.0,196130.0,17756.0,7100.0,31992.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
10,,,40000,40000,40000.0,60 months,16.02,973.15,C,C5,IT Manager - Business Process,10+ years,OWN,140000.0,Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,292xx,SC,31.79,1.0,Aug-1999,0.0,3.0,,11.0,0.0,34632,59.5,52.0,w,33288.83,33288.83,13602.03,13602.03,6711.17,6890.86,0.0,0.0,0.0,Feb-2019,973.15,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,59.0,81346.0,0.0,3.0,0.0,0.0,29.0,46714.0,50.0,0.0,1.0,19100.0,53.0,104500.0,0.0,0.0,1.0,1.0,8135.0,63703.0,57.9,0.0,0.0,220.0,213.0,13.0,13.0,0.0,13.0,,9.0,3.0,0.0,2.0,6.0,2.0,17.0,22.0,8.0,30.0,6.0,11.0,0.0,0.0,0.0,0.0,98.0,0.0,0.0,0.0,197500.0,81346.0,88000.0,93000.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
20,,,14400,14400,14375.0,36 months,20.0,535.16,D,D4,Assistant General Manager,4 years,RENT,38500.0,Verified,Dec-2017,Charged Off,n,,,debt_consolidation,Debt consolidation,799xx,TX,30.94,0.0,Apr-2007,0.0,53.0,,13.0,0.0,11305,68.9,18.0,f,0.0,0.0,3810.12,3803.51,2172.34,1637.78,0.0,0.0,0.0,Aug-2018,535.16,,Jan-2019,0.0,,1,Individual,,,,0.0,93.0,84577.0,1.0,7.0,1.0,1.0,6.0,73272.0,122.0,1.0,2.0,7118.0,110.0,16400.0,1.0,2.0,2.0,3.0,6506.0,150.0,98.2,0.0,0.0,112.0,128.0,9.0,6.0,0.0,9.0,53.0,6.0,53.0,0.0,2.0,3.0,2.0,3.0,12.0,4.0,5.0,3.0,13.0,0.0,0.0,0.0,2.0,94.4,100.0,0.0,0.0,76584.0,84577.0,8200.0,60184.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,


In [47]:
# 컬럼 단위 샘플 접근
df['emp_title']
# df[텍스트형태의 컬럼명]

# 인덱싱이나 슬라이싱으로 데이터에 접근을 할 때 큰 단위를 선택하고 그 결과에서 인덱싱 혹은 슬라이싱을 하면
# 조금 더 편하게, 쉽게 데이터 접근이 가능하다.

0                                     mechanic
1                                          NaN
2                                 Truck driver
3                       Confidential Secretary
4                              General Manager
5                                      NDT III
6                  Supervisory program analyst
7                  Commercial Property Manager
8                         Equipment technician
9                                          NaN
10               IT Manager - Business Process
11                   Strategic Account Manager
12                             Account Manager
13                             sales associate
14                            Machine operator
15                              Vice President
16                        Installation Manager
17                               Senior Agent 
18                           commercial sales 
19                                       sales
20                  Assistant General Manager 
21           

In [48]:
# 여러 컬럼 동시 접근
df[['emp_title', 'grade']]

Unnamed: 0,emp_title,grade
0,mechanic,B
1,,B
2,Truck driver,C
3,Confidential Secretary,B
4,General Manager,C
5,NDT III,C
6,Supervisory program analyst,B
7,Commercial Property Manager,B
8,Equipment technician,C
9,,A


In [49]:
# row와 columns을 동시에 슬라이싱 하는 속성
# df.loc[인덱스, 컬럼명]
df.loc[10:20, ['emp_title', 'grade']]

Unnamed: 0,emp_title,grade
10,IT Manager - Business Process,C
11,Strategic Account Manager,C
12,Account Manager,D
13,sales associate,B
14,Machine operator,B
15,Vice President,C
16,Installation Manager,D
17,Senior Agent,D
18,commercial sales,C
19,sales,D


### 팬시인덱싱
> **`bool`** 형태의 array를 조건을 전달하여 다차원 배열을 인덱싱하는 방법.  
조건식을 사용하여 분석에 필요한 데이터샘플을 추출하기 용이합니다.

In [56]:
# 신용등급이 A인 샘플의 emp_title 확인
df[df['grade'] == "A"]['emp_title'].unique()

array([nan, 'Control System Designer', 'Policy Analyst', ...,
       'Physical Therapy Aide', 'Title searcher', 'Hair Stylist'],
      dtype=object)

In [62]:
# 대출금액평균
df['loan_amnt'].mean()

15315.2945

In [52]:
# 조건식 샘플링
df[df['emp_title'] == 'ceo']

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
16020,,,40000,40000,40000.0,36 months,9.93,1289.38,B,B2,ceo,10+ years,RENT,400000.0,Verified,Dec-2017,Fully Paid,n,,,debt_consolidation,Debt consolidation,787xx,TX,9.2,0.0,Jul-2013,0.0,35.0,,5.0,0.0,31860,56.9,8.0,f,0.0,0.0,42981.087101,42981.09,40000.0,2981.09,0.0,0.0,0.0,Oct-2018,31398.74,,Oct-2018,0.0,,1,Individual,,,,0.0,0.0,62205.0,1.0,2.0,0.0,1.0,17.0,29720.0,3.0,1.0,1.0,19040.0,38.0,56000.0,0.0,0.0,0.0,2.0,12441.0,24140.0,56.9,0.0,0.0,43.0,52.0,6.0,6.0,1.0,6.0,,17.0,,0.0,2.0,2.0,2.0,2.0,3.0,2.0,3.0,2.0,5.0,0.0,0.0,0.0,1.0,85.7,0.0,0.0,0.0,136843.0,62205.0,56000.0,78588.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,DirectPay,N,,,,,,
16322,,,10000,10000,10000.0,36 months,6.72,307.5,A,A3,ceo,2 years,MORTGAGE,130000.0,Not Verified,Dec-2017,Current,n,,,major_purchase,Major purchase,900xx,CA,13.07,0.0,Nov-1992,2.0,,,12.0,0.0,8100,10.6,28.0,w,6347.92,6347.92,4301.27,4301.27,3652.08,649.19,0.0,0.0,0.0,Feb-2019,307.5,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,21910.0,1.0,1.0,1.0,3.0,12.0,13810.0,70.0,3.0,5.0,1797.0,23.0,76300.0,2.0,0.0,13.0,8.0,1992.0,57718.0,6.1,0.0,0.0,142.0,300.0,2.0,2.0,2.0,10.0,,1.0,,0.0,3.0,5.0,8.0,17.0,5.0,10.0,21.0,5.0,12.0,0.0,0.0,0.0,4.0,100.0,0.0,0.0,0.0,96111.0,21910.0,61500.0,19811.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
28988,,,40000,40000,40000.0,36 months,10.42,1298.59,B,B3,ceo,10+ years,OWN,60000.0,Verified,Nov-2017,Current,n,,,credit_card,Credit card refinancing,916xx,CA,8.64,0.0,Apr-2004,2.0,,57.0,9.0,1.0,882,5.2,16.0,w,24843.28,24843.28,19520.62,19520.62,15156.72,4298.97,64.93,0.0,0.0,Feb-2019,2662.11,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,18376.0,1.0,1.0,0.0,1.0,22.0,17494.0,74.0,2.0,5.0,882.0,45.0,16900.0,2.0,1.0,5.0,6.0,2297.0,15018.0,5.5,0.0,0.0,131.0,163.0,5.0,5.0,0.0,9.0,,3.0,,0.0,1.0,1.0,6.0,8.0,5.0,8.0,11.0,1.0,9.0,0.0,0.0,0.0,2.0,100.0,0.0,1.0,0.0,40612.0,18376.0,15900.0,23712.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
36815,,,35000,35000,35000.0,60 months,20.0,927.29,D,D4,ceo,10+ years,MORTGAGE,145000.0,Source Verified,Nov-2017,Current,n,,,major_purchase,Major purchase,956xx,CA,17.99,0.0,Sep-1994,0.0,,93.0,12.0,1.0,95844,91.5,19.0,w,29193.0,29193.0,13851.02,13851.02,5807.0,8044.02,0.0,0.0,0.0,Feb-2019,927.29,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,610663.0,0.0,3.0,0.0,0.0,38.0,15192.0,36.0,3.0,6.0,18801.0,75.0,104800.0,0.0,0.0,1.0,8.0,50889.0,8232.0,83.2,0.0,0.0,225.0,277.0,7.0,7.0,4.0,13.0,,0.0,,0.0,4.0,6.0,4.0,4.0,8.0,7.0,7.0,6.0,12.0,0.0,0.0,0.0,3.0,100.0,75.0,1.0,0.0,657146.0,111036.0,49000.0,42746.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,


In [64]:
# 신용등급 A와 B인 샘플접근
df[(df['grade'] == 'A') | (df['grade'] == 'B')]
# 조건식을 여러개 써야 한다면 조건마다 ()로 감싸주시는 것이 좋습니다.

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
0,,,10000,10000,10000.0,36 months,9.44,320.05,B,B1,mechanic,6 years,MORTGAGE,80000.0,Not Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,762xx,TX,14.82,0.0,Jul-2007,0.0,34.0,,8.0,0.0,5225,73.6,30.0,w,6442.28,6442.28,4493.810000,4493.81,3557.72,936.09,0.00,0.0,0.0,Feb-2019,320.05,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,173110.0,0.0,2.0,0.0,2.0,23.0,12496.0,39.0,0.0,0.0,3949.0,45.0,7100.0,1.0,0.0,0.0,2.0,21639.0,1875.0,73.6,0.0,0.0,125.0,78.0,26.0,23.0,3.0,26.0,,21.0,,0.0,2.0,2.0,4.0,4.0,21.0,4.0,5.0,2.0,8.0,0.0,0.0,0.0,0.0,96.4,25.0,0.0,0.0,196130.0,17756.0,7100.0,31992.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
1,,,3500,3500,3500.0,36 months,10.42,113.63,B,B3,,,OWN,90000.0,Not Verified,Dec-2017,Current,n,,,other,Other,295xx,SC,28.51,0.0,Jun-2002,0.0,39.0,28.0,12.0,3.0,6953,51.9,38.0,w,2266.55,2266.55,1586.770000,1586.77,1233.45,353.32,0.00,0.0,0.0,Feb-2019,113.63,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,339028.0,0.0,4.0,0.0,3.0,22.0,76501.0,69.0,1.0,2.0,1628.0,65.0,13400.0,1.0,5.0,1.0,5.0,28252.0,808.0,82.4,0.0,0.0,164.0,186.0,7.0,7.0,2.0,7.0,39.0,7.0,39.0,0.0,4.0,7.0,4.0,10.0,19.0,7.0,17.0,7.0,12.0,0.0,0.0,0.0,1.0,97.3,75.0,0.0,3.0,416685.0,83454.0,4600.0,110595.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
3,,,14000,14000,14000.0,36 months,10.91,457.75,B,B4,Confidential Secretary,2 years,RENT,39000.0,Source Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,125xx,NY,22.88,0.0,Apr-2003,0.0,74.0,,8.0,0.0,12918,59.5,16.0,w,9090.87,9090.87,6391.530000,6391.53,4909.13,1482.40,0.00,0.0,0.0,Feb-2019,457.75,Mar-2019,Feb-2019,0.0,74.0,1,Individual,,,,0.0,457.0,29103.0,1.0,1.0,1.0,2.0,4.0,16185.0,95.0,1.0,5.0,10153.0,75.0,21700.0,2.0,6.0,0.0,7.0,3638.0,7265.0,61.6,0.0,0.0,36.0,176.0,7.0,4.0,0.0,7.0,,16.0,74.0,1.0,2.0,4.0,3.0,4.0,4.0,7.0,12.0,4.0,8.0,0.0,0.0,0.0,2.0,93.8,33.3,0.0,0.0,38704.0,29103.0,18900.0,17004.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
6,,,20000,20000,20000.0,36 months,9.93,644.69,B,B2,Supervisory program analyst,10+ years,MORTGAGE,140000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,750xx,TX,7.76,2.0,May-1988,2.0,,,9.0,0.0,21374,62.0,24.0,w,12933.83,12933.83,9035.820000,9035.82,7066.17,1937.42,32.23,0.0,0.0,Feb-2019,644.69,Mar-2019,Feb-2019,0.0,17.0,1,Individual,,,,0.0,0.0,159398.0,2.0,1.0,0.0,1.0,16.0,19645.0,86.0,4.0,6.0,18596.0,62.0,34700.0,0.0,0.0,4.0,8.0,17710.0,10933.0,63.0,0.0,0.0,136.0,355.0,3.0,3.0,1.0,3.0,,3.0,,1.0,3.0,7.0,5.0,7.0,11.0,7.0,12.0,4.0,9.0,,0.0,2.0,5.0,71.0,33.3,0.0,0.0,179419.0,41019.0,32800.0,22835.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
7,,,3200,3200,3200.0,36 months,10.91,104.63,B,B4,Commercial Property Manager,< 1 year,MORTGAGE,48000.0,Not Verified,Dec-2017,Current,n,,,other,Other,936xx,CA,16.66,0.0,Mar-2006,0.0,,,9.0,0.0,12748,91.7,11.0,w,2077.66,2077.66,1472.580000,1472.58,1122.34,350.24,0.00,0.0,0.0,Feb-2019,104.63,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,188.0,190238.0,0.0,1.0,0.0,1.0,15.0,13772.0,87.0,1.0,2.0,3987.0,89.0,13900.0,0.0,0.0,0.0,3.0,21138.0,1084.0,91.0,0.0,0.0,15.0,141.0,7.0,7.0,2.0,7.0,,,,0.0,6.0,7.0,6.0,7.0,1.0,7.0,8.0,7.0,9.0,0.0,0.0,0.0,1.0,100.0,83.3,0.0,0.0,211525.0,26520.0,12000.0,15908.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
9,,,35000,35000,35000.0,36 months,6.08,1066.04,A,A2,,,OWN,76000.0,Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,388xx,MS,34.28,0.0,Aug-1994,1.0,,,23.0,0.0,47145,48.0,42.0,w,22139.93,22139.93,14900.920000,14900.92,12860.07,2040.85,0.00,0.0,0.0,Feb-2019,1066.04,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,170361.0,1.0,2.0,0.0,0.0,26.0,28292.0,64.0,2.0,2.0,11128.0,53.0,98200.0,0.0,4.0,1.0,2.0,7744.0,27309.0,63.2,0.0,0.0,148.0,280.0,6.0,6.0,4.0,6.0,,6.0,,0.0,13.0,14.0,14.0,16.0,9.0,20.0,29.0,14.0,23.0,0.0,0.0,0.0,2.0,100.0,35.7,0.0,0.0,252224.0,75437.0,74300.0,44553.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,DirectPay,N,,,,,,
13,,,18000,18000,18000.0,36 months,9.44,576.09,B,B1,sales associate,2 years,OWN,22000.0,Source Verified,Dec-2017,Fully Paid,n,,,debt_consolidation,Debt consolidation,439xx,OH,31.21,0.0,Feb-2008,3.0,,,15.0,0.0,1799,4.6,20.0,w,0.00,0.00,19243.700856,19243.70,18000.00,1243.70,0.00,0.0,0.0,Oct-2018,14077.77,,Feb-2019,0.0,,1,Joint App,70000.0,22.15,Source Verified,0.0,0.0,15737.0,3.0,2.0,1.0,1.0,5.0,13938.0,73.0,5.0,9.0,1499.0,27.0,39300.0,1.0,0.0,5.0,10.0,1049.0,24001.0,5.9,0.0,0.0,53.0,118.0,4.0,4.0,0.0,10.0,,4.0,,0.0,1.0,2.0,5.0,5.0,4.0,13.0,16.0,2.0,15.0,0.0,0.0,0.0,6.0,100.0,0.0,0.0,0.0,58296.0,15737.0,25500.0,18996.0,15710.0,Nov-2015,4.0,0.0,12.0,79.7,1.0,11.0,0.0,0.0,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
14,,,10200,10200,10200.0,36 months,11.99,338.74,B,B5,Machine operator,4 years,RENT,35000.0,Source Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,635xx,MO,15.26,0.0,Dec-2008,1.0,,,6.0,0.0,4238,26.2,19.0,w,6662.23,6662.23,4745.710000,4745.71,3537.77,1191.00,16.94,0.0,0.0,Feb-2019,338.74,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,21882.0,1.0,1.0,1.0,1.0,11.0,17644.0,90.0,1.0,4.0,2193.0,61.0,16200.0,0.0,0.0,5.0,5.0,3647.0,5493.0,31.3,0.0,0.0,108.0,26.0,4.0,4.0,0.0,14.0,,4.0,,0.0,2.0,3.0,3.0,3.0,14.0,5.0,5.0,3.0,6.0,0.0,0.0,0.0,2.0,100.0,33.3,0.0,0.0,35828.0,21882.0,8000.0,19628.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
21,,,20000,20000,20000.0,36 months,6.08,609.17,A,A2,Control System Designer,2 years,OWN,149000.0,Verified,Dec-2017,Current,n,,,other,Other,770xx,TX,13.31,0.0,Jul-1999,0.0,,,16.0,0.0,5049,5.3,27.0,w,12651.33,12651.33,8470.960000,8470.96,7348.67,1122.29,0.00,0.0,0.0,Feb-2019,609.17,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,399842.0,1.0,3.0,2.0,2.0,5.0,92121.0,74.0,3.0,8.0,4983.0,26.0,94800.0,0.0,1.0,1.0,11.0,24990.0,50951.0,9.0,0.0,0.0,130.0,221.0,8.0,5.0,1.0,8.0,,12.0,,0.0,2.0,2.0,9.0,15.0,7.0,12.0,19.0,2.0,16.0,0.0,0.0,0.0,5.0,100.0,0.0,0.0,0.0,527931.0,97170.0,56000.0,110531.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
22,,,40000,40000,40000.0,36 months,10.42,1298.59,B,B3,Teacher,10+ years,MORTGAGE,54000.0,Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,630xx,MO,47.44,0.0,Nov-1990,1.0,,98.0,11.0,1.0,15078,30.2,32.0,w,25903.99,25903.99,18258.280000,18258.28,14096.01,4162.27,0.00,0.0,0.0,Feb-2019,1298.59,Mar-2019,Feb-2019,0.0,,1,Joint App,148500.0,23.74,Verified,0.0,0.0,492950.0,2.0,5.0,2.0,3.0,2.0,63872.0,81.0,0.0,2.0,8576.0,61.0,49900.0,2.0,9.0,8.0,6.0,44814.0,31467.0,32.3,0.0,0.0,175.0,324.0,20.0,2.0,5.0,24.0,,2.0,,0.0,3.0,4.0,3.0,6.0,16.0,5.0,11.0,4.0,11.0,0.0,0.0,0.0,3.0,100.0,33.3,1.0,0.0,583050.0,78950.0,46500.0,119150.0,24210.0,Oct-2005,1.0,7.0,21.0,66.6,14.0,17.0,0.0,0.0,,N,,,,,,,,,,,,,,,Cash,N,,,,,,


In [66]:
# df loan_amnt 컬럼값이 10000이상인 채권샘플의 grade
df[df['loan_amnt'] > 10000]['grade']

3        B
5        C
6        B
8        C
9        A
10       C
11       C
13       B
14       B
15       C
16       D
18       C
20       D
21       A
22       B
23       C
24       D
25       C
27       D
29       A
30       C
31       B
35       C
38       C
42       D
46       A
49       A
51       B
52       B
55       D
        ..
49960    B
49961    D
49962    B
49964    C
49965    B
49966    C
49968    B
49969    E
49970    A
49971    E
49972    B
49974    D
49975    A
49977    D
49978    A
49979    C
49980    C
49981    C
49982    A
49984    B
49985    D
49987    B
49988    C
49989    A
49991    B
49992    D
49993    C
49995    D
49996    C
49997    C
Name: grade, Length: 28989, dtype: object

In [71]:
# df grade C 와 D 인 채권샘플 emp_title, annual_inc
# 옵션 annual_inc 최대값인 인덱스 빼오기
df[(df['grade'] == 'C') | (df['grade'] == 'D')][['emp_title', 'annual_inc']]['annual_inc'].argmax()

The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
  This is separate from the ipykernel package so we can avoid doing imports until


27994

## 데이터프레임 병합
> 실제 분석업무를 진행하다보면 데이터가 여기저기 분산되어 있을 경우가 더 많습니다.  
조각난 데이터를 분석에 필요한 데이터셋으로 만들기 위해 데이터프레임 병합을 많이 사용합니다.  
한개 이상의 데이터프레임을 병합 할 때 주로 사용하는 함수 2가지를 알아보겠습니다.    

### 데이터 병합에 사용가능한 key(병합할 기준이 되는 행 or 열)값이 있는경우
**`pd.merge`**(베이스데이터프레임, 병합할데이터프레임)  
> 사용 가능 한 파라메터
- `how` : 'left', 'right', 'inner', 'outer'
- `left_on` : key값이 다를 경우 베이스데이터프레임의 key 설정
- `right_on` : key값이 다를 경우 병합데이터프레임의 key 설정
    
### 단순 데이터 연결
**`pd.concat`**([베이스데이터프레임, 병합할데이터프레임], axis=0 or 1)
> 사용 가능 한 파라메터  
- `axis` : 축 방향 설정

### merge 예시

In [77]:
merge_df1 = pd.DataFrame({
    '이름': ['원영', '사쿠라', '유리', '예나', '유진', '나코', '은비', '혜원', '히토미', '채원', '민주', '째욘'],
    '국어': [100, 70, 70, 70, 60, 90, 90, 70, 70, 80, 100, 100],
    '영어': [100, 90, 80, 50, 70, 100, 70, 90, 100, 100, 80, 100]
    }, columns=['이름', '국어', '영어']) 

merge_df2 = pd.DataFrame({
    '일어': [80, 100, 100, 90, 70, 50, 100],
    '수학': [90, 70, 100, 80, 70, 80, 90],
    '이름': ['원영', '사쿠라', '나코', '히토미', '예나', '은비', '째욘'],
    }, columns=['일어', '수학', '이름'])

In [78]:
merge_df1

Unnamed: 0,이름,국어,영어
0,원영,100,100
1,사쿠라,70,90
2,유리,70,80
3,예나,70,50
4,유진,60,70
5,나코,90,100
6,은비,90,70
7,혜원,70,90
8,히토미,70,100
9,채원,80,100


In [80]:
merge_df2

Unnamed: 0,일어,수학,이름
0,80,90,원영
1,100,70,사쿠라
2,100,100,나코
3,90,80,히토미
4,70,70,예나
5,50,80,은비
6,100,90,째욘


In [85]:
pd.merge(merge_df1, merge_df2, how='outer')

Unnamed: 0,이름,국어,영어,일어,수학
0,원영,100,100,80.0,90.0
1,사쿠라,70,90,100.0,70.0
2,유리,70,80,,
3,예나,70,50,70.0,70.0
4,유진,60,70,,
5,나코,90,100,100.0,100.0
6,은비,90,70,50.0,80.0
7,혜원,70,90,,
8,히토미,70,100,90.0,80.0
9,채원,80,100,,


### concat 예시
현재 df에 저장되어있는 데이터에 추가로 2만개의 데이터를 이어붙여보겠습니다. df1이라는 변수에 이어붙일 데이터를 불러들여 병합을 진행해보겠습니다.

In [86]:
# df1 변수에 loan2.csv 파일을 읽어들입니다.
df1 = pd.read_csv('loan2.csv')

In [89]:
# 데이터프레임 확인
df1.tail()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
49994,,,12000,12000,12000.0,60 months,14.08,279.72,C,C3,house keeper,10+ years,MORTGAGE,58000.0,Not Verified,Oct-2017,Current,n,,,debt_consolidation,Debt consolidation,054xx,VT,20.88,0.0,Jan-2004,0.0,,,12.0,0.0,9592,39.2,34.0,w,8687.2,8687.2,5360.96,5360.96,3312.8,2048.16,0.0,0.0,0.0,Feb-2019,279.72,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,7921.0,45212.0,2.0,2.0,0.0,1.0,24.0,35620.0,36.0,3.0,4.0,4052.0,38.0,24500.0,2.0,2.0,1.0,5.0,4110.0,1807.0,70.9,0.0,0.0,157.0,164.0,1.0,1.0,2.0,9.0,,9.0,,0.0,2.0,4.0,3.0,9.0,11.0,10.0,21.0,4.0,12.0,0.0,0.0,0.0,3.0,100.0,66.7,0.0,0.0,73948.0,45212.0,6200.0,49448.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
49995,,,12000,12000,12000.0,60 months,25.82,358.01,E,E4,Skilled Labor,< 1 year,MORTGAGE,30000.0,Not Verified,Oct-2017,Fully Paid,n,,,debt_consolidation,Debt consolidation,971xx,OR,19.28,3.0,Mar-2003,1.0,13.0,,10.0,0.0,3497,20.2,26.0,w,0.0,0.0,14499.802172,14499.8,12000.0,2499.8,0.0,0.0,0.0,Aug-2018,182.74,,Sep-2017,0.0,46.0,1,Joint App,135000.0,7.36,Not Verified,0.0,550.0,346404.0,0.0,1.0,1.0,1.0,8.0,9431.0,78.0,0.0,3.0,2238.0,44.0,17300.0,5.0,1.0,7.0,4.0,34640.0,9162.0,19.6,0.0,0.0,163.0,174.0,16.0,8.0,4.0,141.0,46.0,0.0,17.0,2.0,1.0,4.0,2.0,5.0,4.0,7.0,18.0,4.0,10.0,0.0,0.0,0.0,1.0,76.9,50.0,0.0,0.0,382275.0,12928.0,11400.0,12125.0,13710.0,Apr-2003,1.0,4.0,11.0,71.6,1.0,17.0,0.0,0.0,7.0,N,,,,,,,,,,,,,,,Cash,N,,,,,,
49996,,,10000,10000,10000.0,36 months,11.99,332.1,B,B5,Teacher,10+ years,OWN,64000.0,Source Verified,Oct-2017,Current,n,,,debt_consolidation,Debt consolidation,603xx,IL,12.96,0.0,Feb-1995,0.0,,,6.0,0.0,7094,70.9,13.0,w,5993.27,5993.27,5306.94,5306.94,4006.73,1300.21,0.0,0.0,0.0,Feb-2019,332.1,Mar-2019,Feb-2019,1.0,,1,Individual,,,,0.0,77.0,189735.0,1.0,2.0,1.0,2.0,3.0,29354.0,100.0,0.0,1.0,1864.0,93.0,10000.0,0.0,0.0,3.0,3.0,31623.0,136.0,93.2,0.0,0.0,134.0,271.0,15.0,3.0,3.0,111.0,,3.0,,0.0,1.0,3.0,1.0,2.0,4.0,3.0,6.0,3.0,6.0,0.0,0.0,0.0,1.0,100.0,100.0,0.0,0.0,203417.0,36448.0,2000.0,29317.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
49997,,,12000,12000,12000.0,60 months,21.45,327.69,D,D5,,,RENT,60000.0,Not Verified,Oct-2017,Current,n,,,debt_consolidation,Debt consolidation,996xx,AK,30.82,2.0,Jul-2003,1.0,7.0,,8.0,0.0,12927,34.9,17.0,w,9924.69,9924.69,5207.29,5207.29,2075.31,3131.98,0.0,0.0,0.0,Feb-2019,327.69,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,535.0,65238.0,2.0,4.0,2.0,3.0,4.0,52311.0,64.0,1.0,2.0,2406.0,55.0,37000.0,0.0,12.0,4.0,5.0,8155.0,18789.0,14.6,0.0,0.0,170.0,67.0,4.0,4.0,0.0,16.0,9.0,5.0,7.0,0.0,2.0,3.0,2.0,3.0,12.0,4.0,5.0,3.0,8.0,0.0,0.0,0.0,3.0,88.2,0.0,0.0,0.0,118975.0,65238.0,22000.0,81975.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
49998,,,16550,16550,16550.0,60 months,21.45,451.94,D,D5,BABYSITTER,3 years,RENT,60000.0,Not Verified,Oct-2017,Current,n,,,credit_card,Credit card refinancing,112xx,NY,18.4,0.0,Apr-2014,1.0,,,13.0,0.0,25760,50.8,14.0,w,13541.01,13541.01,7355.5,7355.5,3008.99,4346.51,0.0,0.0,0.0,Feb-2019,500.0,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,25760.0,2.0,0.0,0.0,0.0,,0.0,,3.0,8.0,6158.0,51.0,50700.0,0.0,0.0,2.0,8.0,1982.0,9816.0,49.7,0.0,0.0,,41.0,3.0,3.0,0.0,10.0,,0.0,,0.0,5.0,12.0,5.0,5.0,0.0,13.0,14.0,12.0,13.0,0.0,0.0,0.0,3.0,100.0,20.0,0.0,0.0,50700.0,25760.0,19500.0,0.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,


In [90]:
# df 와 df1 shape 확인
df.shape, df1.shape

((50000, 145), (49999, 145))

In [91]:
# 데이터프레임 행단위 병합
df_test = pd.concat([df, df1])

In [95]:
# 데이터인덱스 확인
df_test.tail()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
49994,,,12000,12000,12000.0,60 months,14.08,279.72,C,C3,house keeper,10+ years,MORTGAGE,58000.0,Not Verified,Oct-2017,Current,n,,,debt_consolidation,Debt consolidation,054xx,VT,20.88,0.0,Jan-2004,0.0,,,12.0,0.0,9592,39.2,34.0,w,8687.2,8687.2,5360.96,5360.96,3312.8,2048.16,0.0,0.0,0.0,Feb-2019,279.72,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,7921.0,45212.0,2.0,2.0,0.0,1.0,24.0,35620.0,36.0,3.0,4.0,4052.0,38.0,24500.0,2.0,2.0,1.0,5.0,4110.0,1807.0,70.9,0.0,0.0,157.0,164.0,1.0,1.0,2.0,9.0,,9.0,,0.0,2.0,4.0,3.0,9.0,11.0,10.0,21.0,4.0,12.0,0.0,0.0,0.0,3.0,100.0,66.7,0.0,0.0,73948.0,45212.0,6200.0,49448.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
49995,,,12000,12000,12000.0,60 months,25.82,358.01,E,E4,Skilled Labor,< 1 year,MORTGAGE,30000.0,Not Verified,Oct-2017,Fully Paid,n,,,debt_consolidation,Debt consolidation,971xx,OR,19.28,3.0,Mar-2003,1.0,13.0,,10.0,0.0,3497,20.2,26.0,w,0.0,0.0,14499.802172,14499.8,12000.0,2499.8,0.0,0.0,0.0,Aug-2018,182.74,,Sep-2017,0.0,46.0,1,Joint App,135000.0,7.36,Not Verified,0.0,550.0,346404.0,0.0,1.0,1.0,1.0,8.0,9431.0,78.0,0.0,3.0,2238.0,44.0,17300.0,5.0,1.0,7.0,4.0,34640.0,9162.0,19.6,0.0,0.0,163.0,174.0,16.0,8.0,4.0,141.0,46.0,0.0,17.0,2.0,1.0,4.0,2.0,5.0,4.0,7.0,18.0,4.0,10.0,0.0,0.0,0.0,1.0,76.9,50.0,0.0,0.0,382275.0,12928.0,11400.0,12125.0,13710.0,Apr-2003,1.0,4.0,11.0,71.6,1.0,17.0,0.0,0.0,7.0,N,,,,,,,,,,,,,,,Cash,N,,,,,,
49996,,,10000,10000,10000.0,36 months,11.99,332.1,B,B5,Teacher,10+ years,OWN,64000.0,Source Verified,Oct-2017,Current,n,,,debt_consolidation,Debt consolidation,603xx,IL,12.96,0.0,Feb-1995,0.0,,,6.0,0.0,7094,70.9,13.0,w,5993.27,5993.27,5306.94,5306.94,4006.73,1300.21,0.0,0.0,0.0,Feb-2019,332.1,Mar-2019,Feb-2019,1.0,,1,Individual,,,,0.0,77.0,189735.0,1.0,2.0,1.0,2.0,3.0,29354.0,100.0,0.0,1.0,1864.0,93.0,10000.0,0.0,0.0,3.0,3.0,31623.0,136.0,93.2,0.0,0.0,134.0,271.0,15.0,3.0,3.0,111.0,,3.0,,0.0,1.0,3.0,1.0,2.0,4.0,3.0,6.0,3.0,6.0,0.0,0.0,0.0,1.0,100.0,100.0,0.0,0.0,203417.0,36448.0,2000.0,29317.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
49997,,,12000,12000,12000.0,60 months,21.45,327.69,D,D5,,,RENT,60000.0,Not Verified,Oct-2017,Current,n,,,debt_consolidation,Debt consolidation,996xx,AK,30.82,2.0,Jul-2003,1.0,7.0,,8.0,0.0,12927,34.9,17.0,w,9924.69,9924.69,5207.29,5207.29,2075.31,3131.98,0.0,0.0,0.0,Feb-2019,327.69,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,535.0,65238.0,2.0,4.0,2.0,3.0,4.0,52311.0,64.0,1.0,2.0,2406.0,55.0,37000.0,0.0,12.0,4.0,5.0,8155.0,18789.0,14.6,0.0,0.0,170.0,67.0,4.0,4.0,0.0,16.0,9.0,5.0,7.0,0.0,2.0,3.0,2.0,3.0,12.0,4.0,5.0,3.0,8.0,0.0,0.0,0.0,3.0,88.2,0.0,0.0,0.0,118975.0,65238.0,22000.0,81975.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
49998,,,16550,16550,16550.0,60 months,21.45,451.94,D,D5,BABYSITTER,3 years,RENT,60000.0,Not Verified,Oct-2017,Current,n,,,credit_card,Credit card refinancing,112xx,NY,18.4,0.0,Apr-2014,1.0,,,13.0,0.0,25760,50.8,14.0,w,13541.01,13541.01,7355.5,7355.5,3008.99,4346.51,0.0,0.0,0.0,Feb-2019,500.0,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,25760.0,2.0,0.0,0.0,0.0,,0.0,,3.0,8.0,6158.0,51.0,50700.0,0.0,0.0,2.0,8.0,1982.0,9816.0,49.7,0.0,0.0,,41.0,3.0,3.0,0.0,10.0,,0.0,,0.0,5.0,12.0,5.0,5.0,0.0,13.0,14.0,12.0,13.0,0.0,0.0,0.0,3.0,100.0,20.0,0.0,0.0,50700.0,25760.0,19500.0,0.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,


In [None]:
# 병합 데이터프레임 shape 확인
df.shape

## 인덱스 편집
방금 전 concat으로 병합한 데이터프레임의 이상한 점을 찾으셨나요?  
데이터 자체는 잘 붙였지만 인덱스가 꼬여있습니다. 인덱스 편집은 데이터분석을 위해 필요한 인덱스를 설정하기 위해 필요합니다.

In [102]:
# 인덱스리셋
df_test = df_test.reset_index(drop=True)
# drop - 현재 인덱스의 원본값을 버림
# inplace - 원본값 변경

In [103]:
df_test.tail()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
99994,,,12000,12000,12000.0,60 months,14.08,279.72,C,C3,house keeper,10+ years,MORTGAGE,58000.0,Not Verified,Oct-2017,Current,n,,,debt_consolidation,Debt consolidation,054xx,VT,20.88,0.0,Jan-2004,0.0,,,12.0,0.0,9592,39.2,34.0,w,8687.2,8687.2,5360.96,5360.96,3312.8,2048.16,0.0,0.0,0.0,Feb-2019,279.72,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,7921.0,45212.0,2.0,2.0,0.0,1.0,24.0,35620.0,36.0,3.0,4.0,4052.0,38.0,24500.0,2.0,2.0,1.0,5.0,4110.0,1807.0,70.9,0.0,0.0,157.0,164.0,1.0,1.0,2.0,9.0,,9.0,,0.0,2.0,4.0,3.0,9.0,11.0,10.0,21.0,4.0,12.0,0.0,0.0,0.0,3.0,100.0,66.7,0.0,0.0,73948.0,45212.0,6200.0,49448.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
99995,,,12000,12000,12000.0,60 months,25.82,358.01,E,E4,Skilled Labor,< 1 year,MORTGAGE,30000.0,Not Verified,Oct-2017,Fully Paid,n,,,debt_consolidation,Debt consolidation,971xx,OR,19.28,3.0,Mar-2003,1.0,13.0,,10.0,0.0,3497,20.2,26.0,w,0.0,0.0,14499.802172,14499.8,12000.0,2499.8,0.0,0.0,0.0,Aug-2018,182.74,,Sep-2017,0.0,46.0,1,Joint App,135000.0,7.36,Not Verified,0.0,550.0,346404.0,0.0,1.0,1.0,1.0,8.0,9431.0,78.0,0.0,3.0,2238.0,44.0,17300.0,5.0,1.0,7.0,4.0,34640.0,9162.0,19.6,0.0,0.0,163.0,174.0,16.0,8.0,4.0,141.0,46.0,0.0,17.0,2.0,1.0,4.0,2.0,5.0,4.0,7.0,18.0,4.0,10.0,0.0,0.0,0.0,1.0,76.9,50.0,0.0,0.0,382275.0,12928.0,11400.0,12125.0,13710.0,Apr-2003,1.0,4.0,11.0,71.6,1.0,17.0,0.0,0.0,7.0,N,,,,,,,,,,,,,,,Cash,N,,,,,,
99996,,,10000,10000,10000.0,36 months,11.99,332.1,B,B5,Teacher,10+ years,OWN,64000.0,Source Verified,Oct-2017,Current,n,,,debt_consolidation,Debt consolidation,603xx,IL,12.96,0.0,Feb-1995,0.0,,,6.0,0.0,7094,70.9,13.0,w,5993.27,5993.27,5306.94,5306.94,4006.73,1300.21,0.0,0.0,0.0,Feb-2019,332.1,Mar-2019,Feb-2019,1.0,,1,Individual,,,,0.0,77.0,189735.0,1.0,2.0,1.0,2.0,3.0,29354.0,100.0,0.0,1.0,1864.0,93.0,10000.0,0.0,0.0,3.0,3.0,31623.0,136.0,93.2,0.0,0.0,134.0,271.0,15.0,3.0,3.0,111.0,,3.0,,0.0,1.0,3.0,1.0,2.0,4.0,3.0,6.0,3.0,6.0,0.0,0.0,0.0,1.0,100.0,100.0,0.0,0.0,203417.0,36448.0,2000.0,29317.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
99997,,,12000,12000,12000.0,60 months,21.45,327.69,D,D5,,,RENT,60000.0,Not Verified,Oct-2017,Current,n,,,debt_consolidation,Debt consolidation,996xx,AK,30.82,2.0,Jul-2003,1.0,7.0,,8.0,0.0,12927,34.9,17.0,w,9924.69,9924.69,5207.29,5207.29,2075.31,3131.98,0.0,0.0,0.0,Feb-2019,327.69,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,535.0,65238.0,2.0,4.0,2.0,3.0,4.0,52311.0,64.0,1.0,2.0,2406.0,55.0,37000.0,0.0,12.0,4.0,5.0,8155.0,18789.0,14.6,0.0,0.0,170.0,67.0,4.0,4.0,0.0,16.0,9.0,5.0,7.0,0.0,2.0,3.0,2.0,3.0,12.0,4.0,5.0,3.0,8.0,0.0,0.0,0.0,3.0,88.2,0.0,0.0,0.0,118975.0,65238.0,22000.0,81975.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
99998,,,16550,16550,16550.0,60 months,21.45,451.94,D,D5,BABYSITTER,3 years,RENT,60000.0,Not Verified,Oct-2017,Current,n,,,credit_card,Credit card refinancing,112xx,NY,18.4,0.0,Apr-2014,1.0,,,13.0,0.0,25760,50.8,14.0,w,13541.01,13541.01,7355.5,7355.5,3008.99,4346.51,0.0,0.0,0.0,Feb-2019,500.0,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,25760.0,2.0,0.0,0.0,0.0,,0.0,,3.0,8.0,6158.0,51.0,50700.0,0.0,0.0,2.0,8.0,1982.0,9816.0,49.7,0.0,0.0,,41.0,3.0,3.0,0.0,10.0,,0.0,,0.0,5.0,12.0,5.0,5.0,0.0,13.0,14.0,12.0,13.0,0.0,0.0,0.0,3.0,100.0,20.0,0.0,0.0,50700.0,25760.0,19500.0,0.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,


In [106]:
# 인덱스 초기화
df_test = df_test.reset_index()

In [109]:
# 기존 컬럼값을 취해 index로 사용
df_test.set_index('index', inplace=True)

In [110]:
df_test

Unnamed: 0_level_0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,policy_code,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_act_il,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m,acc_open_past_24mths,avg_cur_bal,bc_open_to_buy,bc_util,chargeoff_within_12_mths,delinq_amnt,mo_sin_old_il_acct,mo_sin_old_rev_tl_op,mo_sin_rcnt_rev_tl_op,mo_sin_rcnt_tl,mort_acc,mths_since_recent_bc,mths_since_recent_bc_dlq,mths_since_recent_inq,mths_since_recent_revol_delinq,num_accts_ever_120_pd,num_actv_bc_tl,num_actv_rev_tl,num_bc_sats,num_bc_tl,num_il_tl,num_op_rev_tl,num_rev_accts,num_rev_tl_bal_gt_0,num_sats,num_tl_120dpd_2m,num_tl_30dpd,num_tl_90g_dpd_24m,num_tl_op_past_12m,pct_tl_nvr_dlq,percent_bc_gt_75,pub_rec_bankruptcies,tax_liens,tot_hi_cred_lim,total_bal_ex_mort,total_bc_limit,total_il_high_credit_limit,revol_bal_joint,sec_app_earliest_cr_line,sec_app_inq_last_6mths,sec_app_mort_acc,sec_app_open_acc,sec_app_revol_util,sec_app_open_act_il,sec_app_num_rev_accts,sec_app_chargeoff_within_12_mths,sec_app_collections_12_mths_ex_med,sec_app_mths_since_last_major_derog,hardship_flag,hardship_type,hardship_reason,hardship_status,deferral_term,hardship_amount,hardship_start_date,hardship_end_date,payment_plan_start_date,hardship_length,hardship_dpd,hardship_loan_status,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1
0,,,10000,10000,10000.0,36 months,9.44,320.05,B,B1,mechanic,6 years,MORTGAGE,80000.0,Not Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,762xx,TX,14.82,0.0,Jul-2007,0.0,34.0,,8.0,0.0,5225,73.6,30.0,w,6442.28,6442.28,4493.810000,4493.81,3557.72,936.09,0.00,0.00,0.0000,Feb-2019,320.05,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,173110.0,0.0,2.0,0.0,2.0,23.0,12496.0,39.0,0.0,0.0,3949.0,45.0,7100.0,1.0,0.0,0.0,2.0,21639.0,1875.0,73.6,0.0,0.0,125.0,78.0,26.0,23.0,3.0,26.0,,21.0,,0.0,2.0,2.0,4.0,4.0,21.0,4.0,5.0,2.0,8.0,0.0,0.0,0.0,0.0,96.4,25.0,0.0,0.0,196130.0,17756.0,7100.0,31992.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
1,,,3500,3500,3500.0,36 months,10.42,113.63,B,B3,,,OWN,90000.0,Not Verified,Dec-2017,Current,n,,,other,Other,295xx,SC,28.51,0.0,Jun-2002,0.0,39.0,28.0,12.0,3.0,6953,51.9,38.0,w,2266.55,2266.55,1586.770000,1586.77,1233.45,353.32,0.00,0.00,0.0000,Feb-2019,113.63,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,339028.0,0.0,4.0,0.0,3.0,22.0,76501.0,69.0,1.0,2.0,1628.0,65.0,13400.0,1.0,5.0,1.0,5.0,28252.0,808.0,82.4,0.0,0.0,164.0,186.0,7.0,7.0,2.0,7.0,39.0,7.0,39.0,0.0,4.0,7.0,4.0,10.0,19.0,7.0,17.0,7.0,12.0,0.0,0.0,0.0,1.0,97.3,75.0,0.0,3.0,416685.0,83454.0,4600.0,110595.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
2,,,5000,5000,5000.0,36 months,13.59,169.90,C,C2,Truck driver,10+ years,OWN,168000.0,Not Verified,Dec-2017,Current,n,,,other,Other,788xx,TX,11.62,0.0,Aug-2002,0.0,44.0,,4.0,0.0,3401,97.2,12.0,w,3291.95,3291.95,2371.050000,2371.05,1708.05,663.00,0.00,0.00,0.0000,Feb-2019,169.90,Mar-2019,Feb-2019,0.0,44.0,1,Individual,,,,0.0,0.0,51673.0,1.0,3.0,1.0,1.0,5.0,48272.0,48.0,0.0,0.0,3401.0,53.0,3500.0,2.0,0.0,3.0,1.0,12918.0,99.0,97.2,0.0,0.0,135.0,184.0,54.0,5.0,0.0,54.0,44.0,5.0,44.0,2.0,1.0,1.0,1.0,4.0,6.0,1.0,6.0,1.0,4.0,0.0,0.0,0.0,1.0,83.3,100.0,0.0,0.0,82176.0,51673.0,3500.0,78676.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
3,,,14000,14000,14000.0,36 months,10.91,457.75,B,B4,Confidential Secretary,2 years,RENT,39000.0,Source Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,125xx,NY,22.88,0.0,Apr-2003,0.0,74.0,,8.0,0.0,12918,59.5,16.0,w,9090.87,9090.87,6391.530000,6391.53,4909.13,1482.40,0.00,0.00,0.0000,Feb-2019,457.75,Mar-2019,Feb-2019,0.0,74.0,1,Individual,,,,0.0,457.0,29103.0,1.0,1.0,1.0,2.0,4.0,16185.0,95.0,1.0,5.0,10153.0,75.0,21700.0,2.0,6.0,0.0,7.0,3638.0,7265.0,61.6,0.0,0.0,36.0,176.0,7.0,4.0,0.0,7.0,,16.0,74.0,1.0,2.0,4.0,3.0,4.0,4.0,7.0,12.0,4.0,8.0,0.0,0.0,0.0,2.0,93.8,33.3,0.0,0.0,38704.0,29103.0,18900.0,17004.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
4,,,5000,5000,5000.0,36 months,13.59,169.90,C,C2,General Manager,< 1 year,RENT,55000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,672xx,KS,12.18,0.0,Jun-1999,0.0,,,5.0,0.0,4497,91.8,6.0,w,3291.95,3291.95,2371.050000,2371.05,1708.05,663.00,0.00,0.00,0.0000,Feb-2019,169.90,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,471.0,7202.0,0.0,1.0,0.0,0.0,71.0,2705.0,12.0,0.0,0.0,1483.0,27.0,4900.0,0.0,1.0,0.0,0.0,1440.0,403.0,91.8,0.0,0.0,149.0,222.0,43.0,43.0,0.0,43.0,,,,0.0,4.0,4.0,4.0,4.0,2.0,4.0,4.0,4.0,5.0,0.0,0.0,0.0,0.0,100.0,100.0,0.0,0.0,26841.0,7202.0,4900.0,21941.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
5,,,36000,36000,36000.0,60 months,14.08,839.16,C,C3,NDT III,10+ years,RENT,74000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,920xx,CA,21.46,0.0,Apr-2001,1.0,39.0,,5.0,0.0,22328,83.3,19.0,w,29698.80,29698.80,11691.920000,11691.92,6301.20,5390.72,0.00,0.00,0.0000,Feb-2019,839.16,Mar-2019,Feb-2019,0.0,58.0,1,Individual,,,,0.0,0.0,38082.0,0.0,2.0,0.0,0.0,29.0,15754.0,46.0,0.0,0.0,18310.0,62.0,26800.0,0.0,3.0,2.0,0.0,7616.0,1672.0,93.0,0.0,0.0,158.0,200.0,65.0,29.0,0.0,70.0,63.0,4.0,58.0,1.0,2.0,2.0,2.0,6.0,9.0,3.0,10.0,2.0,5.0,0.0,0.0,0.0,0.0,83.3,100.0,0.0,0.0,61280.0,38082.0,24000.0,34480.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
6,,,20000,20000,20000.0,36 months,9.93,644.69,B,B2,Supervisory program analyst,10+ years,MORTGAGE,140000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,750xx,TX,7.76,2.0,May-1988,2.0,,,9.0,0.0,21374,62.0,24.0,w,12933.83,12933.83,9035.820000,9035.82,7066.17,1937.42,32.23,0.00,0.0000,Feb-2019,644.69,Mar-2019,Feb-2019,0.0,17.0,1,Individual,,,,0.0,0.0,159398.0,2.0,1.0,0.0,1.0,16.0,19645.0,86.0,4.0,6.0,18596.0,62.0,34700.0,0.0,0.0,4.0,8.0,17710.0,10933.0,63.0,0.0,0.0,136.0,355.0,3.0,3.0,1.0,3.0,,3.0,,1.0,3.0,7.0,5.0,7.0,11.0,7.0,12.0,4.0,9.0,,0.0,2.0,5.0,71.0,33.3,0.0,0.0,179419.0,41019.0,32800.0,22835.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
7,,,3200,3200,3200.0,36 months,10.91,104.63,B,B4,Commercial Property Manager,< 1 year,MORTGAGE,48000.0,Not Verified,Dec-2017,Current,n,,,other,Other,936xx,CA,16.66,0.0,Mar-2006,0.0,,,9.0,0.0,12748,91.7,11.0,w,2077.66,2077.66,1472.580000,1472.58,1122.34,350.24,0.00,0.00,0.0000,Feb-2019,104.63,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,188.0,190238.0,0.0,1.0,0.0,1.0,15.0,13772.0,87.0,1.0,2.0,3987.0,89.0,13900.0,0.0,0.0,0.0,3.0,21138.0,1084.0,91.0,0.0,0.0,15.0,141.0,7.0,7.0,2.0,7.0,,,,0.0,6.0,7.0,6.0,7.0,1.0,7.0,8.0,7.0,9.0,0.0,0.0,0.0,1.0,100.0,83.3,0.0,0.0,211525.0,26520.0,12000.0,15908.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
8,,,14500,14500,14500.0,36 months,16.02,509.93,C,C5,Equipment technician,< 1 year,MORTGAGE,38000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,765xx,TX,14.28,0.0,Oct-2008,0.0,,,6.0,0.0,1381,21.6,7.0,w,9613.67,9613.67,7201.930000,7201.93,4886.33,2315.60,0.00,0.00,0.0000,Feb-2019,509.93,Mar-2019,Feb-2019,0.0,,1,Joint App,61000.0,24.20,Not Verified,0.0,0.0,138605.0,1.0,1.0,1.0,1.0,7.0,22122.0,93.0,1.0,2.0,1381.0,78.0,6400.0,2.0,1.0,4.0,4.0,23101.0,1119.0,55.2,0.0,0.0,110.0,61.0,3.0,3.0,1.0,3.0,,7.0,,0.0,1.0,1.0,2.0,2.0,2.0,4.0,4.0,1.0,6.0,0.0,0.0,0.0,2.0,100.0,50.0,0.0,0.0,147563.0,23503.0,2500.0,23691.0,13179.0,Aug-2009,0.0,1.0,8.0,85.4,4.0,4.0,0.0,0.0,,N,,,,,,,,,,,,,,,Cash,N,,,,,,
9,,,35000,35000,35000.0,36 months,6.08,1066.04,A,A2,,,OWN,76000.0,Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,388xx,MS,34.28,0.0,Aug-1994,1.0,,,23.0,0.0,47145,48.0,42.0,w,22139.93,22139.93,14900.920000,14900.92,12860.07,2040.85,0.00,0.00,0.0000,Feb-2019,1066.04,Mar-2019,Feb-2019,0.0,,1,Individual,,,,0.0,0.0,170361.0,1.0,2.0,0.0,0.0,26.0,28292.0,64.0,2.0,2.0,11128.0,53.0,98200.0,0.0,4.0,1.0,2.0,7744.0,27309.0,63.2,0.0,0.0,148.0,280.0,6.0,6.0,4.0,6.0,,6.0,,0.0,13.0,14.0,14.0,16.0,9.0,20.0,29.0,14.0,23.0,0.0,0.0,0.0,2.0,100.0,35.7,0.0,0.0,252224.0,75437.0,74300.0,44553.0,,,,,,,,,,,,N,,,,,,,,,,,,,,,DirectPay,N,,,,,,


In [None]:
# 기존 인덱스값을 날리면서 인덱스 초기화
df = df.reset_index(drop=True)

## 컬럼편집
인덱스편집과 마찬가지로 데이터프레임의 컬럼을 변경해야 할 경우도 있습니다. 데이터프레임은 컬럼단위 샘플링 및 인덱싱, 이름변경이 가능합니다.

### 컬럼선택

In [112]:
# df 컬럼명 접근
df.columns

'id'

In [None]:
# columns 속성도 인덱싱 및 슬라이싱이 가능합니다.
df.columns[0:26]

저는 개인정보에 관한 부분에 관심이 생겼습니다. 데이터셋 중 필요한 부분만을 컬럼단위로 추려보겠습니다.

In [114]:
# df의 개인정보에 관한 컬럼만을 색인으로 df를 슬라이싱하고 person_df 변수에 할당
person_df = df[df.columns[0:26]]
person_df

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs
0,,,10000,10000,10000.0,36 months,9.44,320.05,B,B1,mechanic,6 years,MORTGAGE,80000.0,Not Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,762xx,TX,14.82,0.0
1,,,3500,3500,3500.0,36 months,10.42,113.63,B,B3,,,OWN,90000.0,Not Verified,Dec-2017,Current,n,,,other,Other,295xx,SC,28.51,0.0
2,,,5000,5000,5000.0,36 months,13.59,169.90,C,C2,Truck driver,10+ years,OWN,168000.0,Not Verified,Dec-2017,Current,n,,,other,Other,788xx,TX,11.62,0.0
3,,,14000,14000,14000.0,36 months,10.91,457.75,B,B4,Confidential Secretary,2 years,RENT,39000.0,Source Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,125xx,NY,22.88,0.0
4,,,5000,5000,5000.0,36 months,13.59,169.90,C,C2,General Manager,< 1 year,RENT,55000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,672xx,KS,12.18,0.0
5,,,36000,36000,36000.0,60 months,14.08,839.16,C,C3,NDT III,10+ years,RENT,74000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,920xx,CA,21.46,0.0
6,,,20000,20000,20000.0,36 months,9.93,644.69,B,B2,Supervisory program analyst,10+ years,MORTGAGE,140000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,750xx,TX,7.76,2.0
7,,,3200,3200,3200.0,36 months,10.91,104.63,B,B4,Commercial Property Manager,< 1 year,MORTGAGE,48000.0,Not Verified,Dec-2017,Current,n,,,other,Other,936xx,CA,16.66,0.0
8,,,14500,14500,14500.0,36 months,16.02,509.93,C,C5,Equipment technician,< 1 year,MORTGAGE,38000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,765xx,TX,14.28,0.0
9,,,35000,35000,35000.0,36 months,6.08,1066.04,A,A2,,,OWN,76000.0,Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,388xx,MS,34.28,0.0


### 컬럼삭제
현재 데이터셋에는 개인식별정보가 지워져서 데이터가 존재하지 않습니다. 불필요한 데이터 column을 지우도록 하겠습니다.

In [115]:
# 지울 column의 데이터값이 모두 NaN인지 확인
person_df['id'].sum(), person_df['member_id'].sum(), person_df['url'].sum(), person_df['desc'].sum()

(0.0, 0.0, 0.0, 0.0)

삭제할 컬럼 모두 데이터가 없는 것을 확인했습니다.

In [116]:
# 컬럼삭제 drop('컬럼명', axis=1)
# del (df['컬럼명'])
# 실제로는 컬럼 및 행도 삭제 가능합니다. axis=0(기본값)
# inplace=True 파라메터를 사용해서 원본값을 변경가능합니다.
person_df = person_df.drop('id', axis=1)
# person_df = person_df.drop('id', axis=1)

In [117]:
person_df

Unnamed: 0,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs
0,,10000,10000,10000.0,36 months,9.44,320.05,B,B1,mechanic,6 years,MORTGAGE,80000.0,Not Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,762xx,TX,14.82,0.0
1,,3500,3500,3500.0,36 months,10.42,113.63,B,B3,,,OWN,90000.0,Not Verified,Dec-2017,Current,n,,,other,Other,295xx,SC,28.51,0.0
2,,5000,5000,5000.0,36 months,13.59,169.90,C,C2,Truck driver,10+ years,OWN,168000.0,Not Verified,Dec-2017,Current,n,,,other,Other,788xx,TX,11.62,0.0
3,,14000,14000,14000.0,36 months,10.91,457.75,B,B4,Confidential Secretary,2 years,RENT,39000.0,Source Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,125xx,NY,22.88,0.0
4,,5000,5000,5000.0,36 months,13.59,169.90,C,C2,General Manager,< 1 year,RENT,55000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,672xx,KS,12.18,0.0
5,,36000,36000,36000.0,60 months,14.08,839.16,C,C3,NDT III,10+ years,RENT,74000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,920xx,CA,21.46,0.0
6,,20000,20000,20000.0,36 months,9.93,644.69,B,B2,Supervisory program analyst,10+ years,MORTGAGE,140000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,750xx,TX,7.76,2.0
7,,3200,3200,3200.0,36 months,10.91,104.63,B,B4,Commercial Property Manager,< 1 year,MORTGAGE,48000.0,Not Verified,Dec-2017,Current,n,,,other,Other,936xx,CA,16.66,0.0
8,,14500,14500,14500.0,36 months,16.02,509.93,C,C5,Equipment technician,< 1 year,MORTGAGE,38000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,765xx,TX,14.28,0.0
9,,35000,35000,35000.0,36 months,6.08,1066.04,A,A2,,,OWN,76000.0,Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,388xx,MS,34.28,0.0


In [118]:
# inplace 파라메터 사용
person_df.drop('member_id', axis=1, inplace=True)

In [119]:
person_df

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,url,desc,purpose,title,zip_code,addr_state,dti,delinq_2yrs
0,10000,10000,10000.0,36 months,9.44,320.05,B,B1,mechanic,6 years,MORTGAGE,80000.0,Not Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,762xx,TX,14.82,0.0
1,3500,3500,3500.0,36 months,10.42,113.63,B,B3,,,OWN,90000.0,Not Verified,Dec-2017,Current,n,,,other,Other,295xx,SC,28.51,0.0
2,5000,5000,5000.0,36 months,13.59,169.90,C,C2,Truck driver,10+ years,OWN,168000.0,Not Verified,Dec-2017,Current,n,,,other,Other,788xx,TX,11.62,0.0
3,14000,14000,14000.0,36 months,10.91,457.75,B,B4,Confidential Secretary,2 years,RENT,39000.0,Source Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,125xx,NY,22.88,0.0
4,5000,5000,5000.0,36 months,13.59,169.90,C,C2,General Manager,< 1 year,RENT,55000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,672xx,KS,12.18,0.0
5,36000,36000,36000.0,60 months,14.08,839.16,C,C3,NDT III,10+ years,RENT,74000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,920xx,CA,21.46,0.0
6,20000,20000,20000.0,36 months,9.93,644.69,B,B2,Supervisory program analyst,10+ years,MORTGAGE,140000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,750xx,TX,7.76,2.0
7,3200,3200,3200.0,36 months,10.91,104.63,B,B4,Commercial Property Manager,< 1 year,MORTGAGE,48000.0,Not Verified,Dec-2017,Current,n,,,other,Other,936xx,CA,16.66,0.0
8,14500,14500,14500.0,36 months,16.02,509.93,C,C5,Equipment technician,< 1 year,MORTGAGE,38000.0,Not Verified,Dec-2017,Current,n,,,debt_consolidation,Debt consolidation,765xx,TX,14.28,0.0
9,35000,35000,35000.0,36 months,6.08,1066.04,A,A2,,,OWN,76000.0,Verified,Dec-2017,Current,n,,,credit_card,Credit card refinancing,388xx,MS,34.28,0.0


In [120]:
# del 메모리 삭제 키워드 사용
del person_df['url']
del person_df['desc']

In [121]:
person_df

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,purpose,title,zip_code,addr_state,dti,delinq_2yrs
0,10000,10000,10000.0,36 months,9.44,320.05,B,B1,mechanic,6 years,MORTGAGE,80000.0,Not Verified,Dec-2017,Current,n,credit_card,Credit card refinancing,762xx,TX,14.82,0.0
1,3500,3500,3500.0,36 months,10.42,113.63,B,B3,,,OWN,90000.0,Not Verified,Dec-2017,Current,n,other,Other,295xx,SC,28.51,0.0
2,5000,5000,5000.0,36 months,13.59,169.90,C,C2,Truck driver,10+ years,OWN,168000.0,Not Verified,Dec-2017,Current,n,other,Other,788xx,TX,11.62,0.0
3,14000,14000,14000.0,36 months,10.91,457.75,B,B4,Confidential Secretary,2 years,RENT,39000.0,Source Verified,Dec-2017,Current,n,credit_card,Credit card refinancing,125xx,NY,22.88,0.0
4,5000,5000,5000.0,36 months,13.59,169.90,C,C2,General Manager,< 1 year,RENT,55000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,672xx,KS,12.18,0.0
5,36000,36000,36000.0,60 months,14.08,839.16,C,C3,NDT III,10+ years,RENT,74000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,920xx,CA,21.46,0.0
6,20000,20000,20000.0,36 months,9.93,644.69,B,B2,Supervisory program analyst,10+ years,MORTGAGE,140000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,750xx,TX,7.76,2.0
7,3200,3200,3200.0,36 months,10.91,104.63,B,B4,Commercial Property Manager,< 1 year,MORTGAGE,48000.0,Not Verified,Dec-2017,Current,n,other,Other,936xx,CA,16.66,0.0
8,14500,14500,14500.0,36 months,16.02,509.93,C,C5,Equipment technician,< 1 year,MORTGAGE,38000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,765xx,TX,14.28,0.0
9,35000,35000,35000.0,36 months,6.08,1066.04,A,A2,,,OWN,76000.0,Verified,Dec-2017,Current,n,credit_card,Credit card refinancing,388xx,MS,34.28,0.0


### 컬럼명 변경
    경우에 따라서는 데이터셋 제작 중 컬럼명을 변경해야 할 경우도 있습니다.
    국내 수집 데이터 사용 시 컬럼이 한글일 경우 영어로 변경을 많이 합니다.

In [122]:
# home_ownership을 간략하게 home으로 변경
# 한글도 가능합니다만 권장하지는 않습니다.
person_df.rename(columns={'home_ownership' : 'home'}, inplace=True)

In [123]:
person_df

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,purpose,title,zip_code,addr_state,dti,delinq_2yrs
0,10000,10000,10000.0,36 months,9.44,320.05,B,B1,mechanic,6 years,MORTGAGE,80000.0,Not Verified,Dec-2017,Current,n,credit_card,Credit card refinancing,762xx,TX,14.82,0.0
1,3500,3500,3500.0,36 months,10.42,113.63,B,B3,,,OWN,90000.0,Not Verified,Dec-2017,Current,n,other,Other,295xx,SC,28.51,0.0
2,5000,5000,5000.0,36 months,13.59,169.90,C,C2,Truck driver,10+ years,OWN,168000.0,Not Verified,Dec-2017,Current,n,other,Other,788xx,TX,11.62,0.0
3,14000,14000,14000.0,36 months,10.91,457.75,B,B4,Confidential Secretary,2 years,RENT,39000.0,Source Verified,Dec-2017,Current,n,credit_card,Credit card refinancing,125xx,NY,22.88,0.0
4,5000,5000,5000.0,36 months,13.59,169.90,C,C2,General Manager,< 1 year,RENT,55000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,672xx,KS,12.18,0.0
5,36000,36000,36000.0,60 months,14.08,839.16,C,C3,NDT III,10+ years,RENT,74000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,920xx,CA,21.46,0.0
6,20000,20000,20000.0,36 months,9.93,644.69,B,B2,Supervisory program analyst,10+ years,MORTGAGE,140000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,750xx,TX,7.76,2.0
7,3200,3200,3200.0,36 months,10.91,104.63,B,B4,Commercial Property Manager,< 1 year,MORTGAGE,48000.0,Not Verified,Dec-2017,Current,n,other,Other,936xx,CA,16.66,0.0
8,14500,14500,14500.0,36 months,16.02,509.93,C,C5,Equipment technician,< 1 year,MORTGAGE,38000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,765xx,TX,14.28,0.0
9,35000,35000,35000.0,36 months,6.08,1066.04,A,A2,,,OWN,76000.0,Verified,Dec-2017,Current,n,credit_card,Credit card refinancing,388xx,MS,34.28,0.0


## 데이터 샘플링 및 분석
> 데이터병합, 인덱스편집, 컬럼선택만으로도 불필요한 정보를 삭제하고 새롭게 데이터셋을 만들 수 있는것을 확인했습니다.  
위에 학습한 내용도 데이터 샘플링에 속한 내용이지만 지금부터는 데이터셋의 데이터를 살펴보면서 의미있는 데이터를 추려보도록 하겠습니다.  
    
**데이터프레임의 기본적인 인덱싱, 슬라이싱, 조건부 샘플링을 조합하면 데이터의 샘플을 확인 하는 과정만으로도 데이터분석이 가능해집니다.**

In [None]:
# 분석에 필요한 데이터프레임을 만들었으니 원본값을 사용하겠습니다. 기존 df에 person_df 값을 덮어 씌웁니다.
df = person_df

In [125]:
# 분석에 필요한 데이터셋을 생성했다면 파일로도 저장 해둡시다.
df.to_csv('loan_df.csv')

In [None]:
# 저장한 loan_df.csv 파일을 fianl_df 변수에 불러들여서 저장하고 
# 데이터를 확인해보겠습니다.

In [128]:
final_df = pd.read_csv('loan_df.csv')

In [131]:
final_df

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,purpose,title,zip_code,addr_state,dti,delinq_2yrs
0,10000,10000,10000.0,36 months,9.44,320.05,B,B1,mechanic,6 years,MORTGAGE,80000.0,Not Verified,Dec-2017,Current,n,credit_card,Credit card refinancing,762xx,TX,14.82,0.0
1,3500,3500,3500.0,36 months,10.42,113.63,B,B3,,,OWN,90000.0,Not Verified,Dec-2017,Current,n,other,Other,295xx,SC,28.51,0.0
2,5000,5000,5000.0,36 months,13.59,169.90,C,C2,Truck driver,10+ years,OWN,168000.0,Not Verified,Dec-2017,Current,n,other,Other,788xx,TX,11.62,0.0
3,14000,14000,14000.0,36 months,10.91,457.75,B,B4,Confidential Secretary,2 years,RENT,39000.0,Source Verified,Dec-2017,Current,n,credit_card,Credit card refinancing,125xx,NY,22.88,0.0
4,5000,5000,5000.0,36 months,13.59,169.90,C,C2,General Manager,< 1 year,RENT,55000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,672xx,KS,12.18,0.0
5,36000,36000,36000.0,60 months,14.08,839.16,C,C3,NDT III,10+ years,RENT,74000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,920xx,CA,21.46,0.0
6,20000,20000,20000.0,36 months,9.93,644.69,B,B2,Supervisory program analyst,10+ years,MORTGAGE,140000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,750xx,TX,7.76,2.0
7,3200,3200,3200.0,36 months,10.91,104.63,B,B4,Commercial Property Manager,< 1 year,MORTGAGE,48000.0,Not Verified,Dec-2017,Current,n,other,Other,936xx,CA,16.66,0.0
8,14500,14500,14500.0,36 months,16.02,509.93,C,C5,Equipment technician,< 1 year,MORTGAGE,38000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,765xx,TX,14.28,0.0
9,35000,35000,35000.0,36 months,6.08,1066.04,A,A2,,,OWN,76000.0,Verified,Dec-2017,Current,n,credit_card,Credit card refinancing,388xx,MS,34.28,0.0


In [None]:
# 불러들인 csv 파일의 unnamed 컬럼 삭제
del final_df['Unnamed: 0']

### 저는 채권자의 개인정보에 관심이 많습니다. 고객의 직업을 살펴보겠습니다.

In [133]:
df = final_df

In [134]:
df['emp_title']

0                                     mechanic
1                                          NaN
2                                 Truck driver
3                       Confidential Secretary
4                              General Manager
5                                      NDT III
6                  Supervisory program analyst
7                  Commercial Property Manager
8                         Equipment technician
9                                          NaN
10               IT Manager - Business Process
11                   Strategic Account Manager
12                             Account Manager
13                             sales associate
14                            Machine operator
15                              Vice President
16                        Installation Manager
17                               Senior Agent 
18                           commercial sales 
19                                       sales
20                  Assistant General Manager 
21           

In [135]:
# 값을 카운트 하는 함수 value_counts()
df['emp_title'].value_counts()

Teacher                              895
Manager                              870
Owner                                841
Driver                               425
Registered Nurse                     362
Sales                                351
Supervisor                           343
RN                                   319
owner                                260
Project Manager                      232
Director                             223
General Manager                      221
President                            212
Office Manager                       197
Engineer                             157
Nurse                                156
manager                              150
Sales Manager                        143
Operations Manager                   129
Administrative Assistant             128
Server                               127
Account Manager                      126
teacher                              121
Truck Driver                         119
Accountant      

### 데이터프레임 형변환

In [139]:
# Owner, owner 같은 직업이지만 대소문자 구분에 따라 다른 값으로 취급되는 문제가 있네요.
# 대소문자 구분을 없애기 위해 모두 소문자로 데이터값을 변경하겠습니다.
# 소문자 변환 전 혹시모를 int, float 데이터가 있을지 모를 상황에 대비해서 모두 문자열로 변경해주겠습니다.
# 형변환 함수 astype(데이터타입)
df['emp_title'] = df['emp_title'].astype(str)

In [137]:
%%time
# 반복문을 사용한 데이터 변경도 가능
# 하지만 파이썬의 강점을 살리지 못한 코드
for index, title in enumerate(df['emp_title']):
    # print(index, title)
    df['emp_title'][index] = title.lower()

0 mechanic
1 nan
2 Truck driver
3 Confidential Secretary
4 General Manager
5 NDT III
6 Supervisory program analyst
7 Commercial Property Manager
8 Equipment technician
9 nan
10 IT Manager - Business Process
11 Strategic Account Manager
12 Account Manager
13 sales associate
14 Machine operator
15 Vice President
16 Installation Manager
17 Senior Agent 
18 commercial sales 
19 sales
20 Assistant General Manager 
21 Control System Designer
22 Teacher
23 Consultant
24 Office manager
25 Probation Officer
26 Showroom Sales
27 Server
28 EMT
29 Policy Analyst
30 DIRECTOR OF NURSING
31 Senior Analyst 
32 Science lab director 
33 Instrumentation Tech
34 Bodyman
35 Private Secretary
36 Bus driver
37 EHS Manager
38 In Process Inspector 
39 Paraprofessional
40 service technician
41 production manager
42 RN
43 Account Executive
44 Nutrition Services
45 Selector
46 Supervisor
47 Executive Assistant
48 Extruder Operator 
49 Nanny
50 warehouseman
51 Sales
52 Owner Operator
53 Guidance Counselor
54 nan
5

1065 Drywall Finisher
1066 Clinical Program Associate 
1067 Senior Accounting Specialist
1068 Sr Operations Administrator
1069 Laundry Manager
1070 District manager
1071 Bus Driver
1072 nan
1073 Clerical Assistant 
1074 RN Case Manager
1075 88m
1076 nan
1077 ASSOCIATE ACCOUNTANT
1078 cna
1079 nan
1080 Legal assistant
1081 Loader
1082 Attorney
1083 Barista
1084 nan
1085 Teacher
1086 CONSULTANT
1087 Research Specialist
1088 Leader, E/I Maintenance
1089 Paramedic
1090 Assembler
1091 Info analyst
1092 Truck driver
1093 Aircraft Cleaner
1094 Office manager
1095 Supply Chain Coordinator 
1096 Nurse Supervisor
1097 Customer Service Rep II
1098 Chief
1099 Quality assurance 
1100 SAP Business Analyst
1101 Clinical Manager
1102 Executive Assistant
1103 Digital Project Manager
1104 nan
1105 Attorney
1106 nan
1107 IT Project Manager
1108 HR Manager
1109 Software Developer
1110 Landman
1111 Demand Planner
1112 Rn
1113 Packing
1114 Quality Assurance Associate
1115 Manager
1116 Sales
1117 Hostess
111

2309 Network Engineer
2310 Associate Director, Alumni Engagment
2311 TEACHER
2312 nan
2313 Digital copier operator 
2314 teacher 
2315 nan
2316 fabricator
2317 Library Associate 2
2318 Bus Operator
2319 nan
2320 Owner
2321 Northland Manager
2322 realtor
2323 ACCOUNT MANAGER
2324 Research Hygienist
2325 Branch manager
2326 Project Manager
2327 Managing Director
2328 Rn
2329 Engineer
2330 Director of IT Operations
2331 bus operations
2332 Controller
2333 SERVICE DIRECTOR
2334 nan
2335 Director of Nursing 
2336 General Manager/Owner
2337 Lpn
2338 pharmacy tehc
2339 Cardiac Sonographer
2340 CEO
2341 Sales Manager
2342 para-professional
2343 nan
2344 Lobby Manager
2345 CHEF
2346 Stylist
2347 Real Estate
2348 Frontend Manager
2349 Truck Driver
2350 Staff Analyst
2351 Technician
2352 Account Executive
2353 Staff Accountant
2354 Cashier
2355 Lab tech
2356 nan
2357 Consultant
2358 EMT
2359 Sales area manager
2360 Director of Business Operations 
2361 Shift Leader
2362 VP
2363 Manager
2364 Direc

3410 Foreman
3411 Nurse
3412 Patient Care Advocate 
3413 Clerk
3414 Business Development Director
3415 Coder
3416 Shop manager
3417 Journeyman electrician
3418 Sales
3419 MALT OPERATOR
3420 President
3421 Lead Counselor
3422 Mechanic
3423 Attorney 
3424 Teacher 
3425 Sr. Operations Associate
3426 nan
3427 Engineer
3428 Senior Sales Manager
3429 Sales
3430 Teacher
3431 Correction Officer
3432 Driver
3433 nan
3434 Dental hygienist 
3435 Security Officer
3436 Caregiver, courier 
3437 Laborer
3438 Respiratory therapist 
3439 operator
3440 diesetter setup operator
3441 Surgery scheduling
3442 Support Manager
3443 Postdoctoral researcher
3444 Owner 
3445 Programmer Analyst
3446 OWNER
3447 Window Cleaner
3448 Senior Producer Videographer
3449 Owner
3450 Assistant Loan Officer 
3451 Installer 
3452 trainer
3453 nan
3454 Aba specialist 
3455 QA Engineer
3456 Teacher
3457 Owner
3458 Locomotive Engineer
3459 Director - AMS
3460 Api 936 Inspector
3461 nan
3462 GM
3463 Federal Probation Officer
346

4614 Product Development Manager
4615 Paramedic
4616 Senior Designer
4617 Hemophilia Manager
4618 Contracts Manager
4619 Paramedic
4620 Foreman
4621 nan
4622 QA
4623 Voucher Examiner
4624 Welder 
4625 Resident
4626 Cook/chef
4627 Salary Material Supervisor
4628 nan
4629 Analysts Associate 
4630 GENERAL MANAGER
4631 Registered Nurse
4632 Director Customer Service
4633 Financial Analyst
4634 Truck driver
4635 President
4636 IT Quality Assurance Lead
4637 Electrician 
4638 Sales manager 
4639 nan
4640 maintenance
4641 VP 
4642 Sr. Project Analyst
4643 corporate executive chef
4644 Realtor
4645 VP Construction
4646 RN
4647 Manager
4648 General Manager
4649 Admin
4650 Teacher
4651 Payroll Manager
4652 Assistant 
4653 Manager
4654 Owner 
4655 Mail carrier
4656 manager
4657 Housekeeping 
4658 Bus driver
4659 Sr Title Consultant
4660 Warehouse Manager
4661 Sales manager 
4662 Warehouse Worker
4663 Associate Professor of Finance
4664 Engineer 
4665 Foreman Electrician
4666 MOW 
4667 Ops tech
46

5666 Associate Systems Administrator 
5667 Accountant
5668 Teacher
5669 Territory Manager
5670 Office Manager
5671 Window Clerk
5672 nan
5673 Salesman
5674 Pipefitter 
5675 Operations support associate III
5676 Teacher
5677 Main Banker
5678 Manager
5679 Software Engineer
5680 city carrier
5681 Office manager
5682 Sales Leader
5683 Facilities Director 
5684 Quality Lead
5685 Service Technician
5686 Driver
5687 Network Architect
5688 Police Officer
5689 Night Stock Manager/ Baker
5690 Senior Software Engineer
5691 Supervisor
5692 Bus Operator
5693 Pension Analyst
5694 Firefighter
5695 Digital Marketing Manager
5696 Owner
5697 Manager 
5698 Technician
5699 nan
5700 Owner
5701 nan
5702 Machanic
5703 Supervisor
5704 nan
5705 Utility Worker
5706 Cook
5707 File Clerk
5708 A/C tech
5709 Deportation Officer
5710 Energy technician
5711 server
5712 nan
5713 Case Manager
5714 Senior Vice President 
5715 Field technician
5716 Theft Claims Adjuster
5717 nan
5718 Machinist
5719 Sr. Food srvc site lea

6801 Analyst
6802 Senior Accounting Manager
6803 Journeyman Carpenter
6804 Manager
6805 tax analyst
6806 Telephony Operations
6807 senior officer specialist
6808 Nurse
6809 nan
6810 Pharmacist 
6811 Cyber Systems Operations
6812 Driver
6813 Registered Nurse
6814 Owner
6815 Administrative Assistant 
6816 Coating operator
6817 Senior Project Manager
6818 Manager
6819 nan
6820 Office Manager
6821 Teacher
6822 operator
6823 Superintendent 
6824 Lead Technician 
6825 Principal secretary
6826 Team Leader
6827 Marketing Manager
6828 Owner
6829 Escaltions Supervisor
6830 Paralegal
6831 Scenic Charge Artist
6832 Malpractice Specialist
6833 Manager
6834 Banker 
6835 Sr Technical Claims Specialist
6836 Owner
6837 Project Manager
6838 Customer Service
6839 Retirement Coubselor
6840 physical therapist
6841 Sales Coach
6842 Contract Manager
6843 Library Aide
6844 nan
6845 nan
6846 Registered Nurse
6847 Financial crimes consultant
6848 Owner
6849 Underwriter
6850 Encore Execurive
6851 Lead CNC operat

7922 Sales
7923 Project Manager 
7924 Production Manager
7925 Outside sales
7926 Owner
7927 Web Consultant
7928 President
7929 CNA
7930 Support manager
7931 Transportation Aide/Superiver
7932 Web Manager
7933 Secretary III
7934 Restaurant manager
7935 National Agronomic Instructor 
7936 Producing Branch Manager
7937 Sales
7938 President
7939 Executive Administrator/Contract Manager
7940 Owner
7941 IT Manager
7942 Active duty
7943 Medical Coder
7944 Sales Account Executive
7945 Community trainers
7946 Account Executive
7947 Executive Admin
7948 Captain
7949 Technology Technician
7950 Electrician
7951 Estate Manager
7952 Sr. Consult 
7953 SALES PERSON
7954 Underwriter 
7955 Tax Manager
7956 INSURANCE AGENT
7957 nan
7958 HVAC Technician Supervisor 
7959 Officer 
7960 Association Manage
7961 Driver/ Shift Manager
7962 Caster
7963 STORE MANAGER
7964 Health Services Supervisor
7965 Administrative Officer
7966 Field Manager
7967 Manager Automation, Robotics
7968 RN
7969 Truck Driver
7970 Cont

8995 Software Engineer
8996 Technical Sales
8997 Office Manager
8998 Creative Director
8999 Security 
9000 Loan Operations Manager
9001 nan
9002 Manager
9003 Real Estate Agent
9004 Content Marketing Specialist
9005 Teacher
9006 Manager 
9007 nan
9008 Director of Medical Review
9009 nan
9010 vice president
9011 nan
9012 nan
9013 System Integration and Test Engineer
9014 Physician
9015 Owner/CPA
9016 cs manager
9017 Cf setup2
9018 Store Manager 
9019 Ramp Supervisor
9020 nan
9021 Nurse Manager
9022 BLS Firefighter/Engineer 
9023 IT 
9024 Test Engineer
9025 Night crew manager
9026 Safety Director
9027 Processing Manager
9028 social case worker
9029 Warehouse
9030 cardi tech
9031 Architectural Intern
9032 Regional Mgr
9033 911 dispatcher
9034 Owner/President
9035 Owner
9036 General Manager
9037 Respiratory Therapist
9038 RN
9039 Machinist
9040 Pre-Licensed Psychologist
9041 Underwriter Specialist
9042 nan
9043 Admin. Asst.
9044 Beverage trainer
9045 QAS GS-11
9046 Operation Manager
9047 na

10327 bartender
10328 nan
10329 Sales
10330 Accounts Payable
10331 delivery driver
10332 Sergeant 
10333 Center Store
10334 Owner
10335 fire watch lead
10336 Owner manager
10337 Firefighter 
10338 Teacher
10339 Systems Engineer
10340 nan
10341 Rn
10342 Truck driver
10343 Sales
10344 nan
10345 owner
10346 soldier
10347 Business Development -Commercial Lending
10348 Teacher
10349 Machinist
10350 General work associate
10351 Instructor
10352 Train attendant
10353 Superintendent
10354 Finisher
10355 assistant manager
10356 Attorney
10357 nan
10358 Construction Foreman
10359 Westcoast Director
10360 nan
10361 Police Officer/ArmyReserve 
10362 Registered Nurse
10363 Accounts Payable Specialist
10364 Registered Nurse
10365 TRANSPORTER(DRIVER)
10366 Coordinator and sales
10367 Senior Attorney
10368 nan
10369 Registered Nurse
10370 Branch Manager
10371 nan
10372 Jail clerk
10373 Director of Operations
10374 aircraft inspector
10375 VP of Operations
10376 Painter 
10377 nan
10378 Teacher
10379 M

11351 Owner
11352 Lead products handler 
11353 Hair dresser
11354 Police officer 
11355 Senor System Administrator
11356 Journeyman Electrician 
11357 Teacher
11358 Director of Finance
11359 Manager
11360 Manager
11361 Designer
11362 Project engineer
11363 Accountant
11364 Teacher/Athletic Director
11365 Private Music Tutor
11366 Paraprofessional health aide
11367 Discounts Coordinator 
11368 Driver
11369 IT
11370 Contracts Manager
11371 Engineering Aide II
11372 Owner
11373 Carman
11374 Teacher
11375 rn
11376 Sr. Internal Auditor
11377 Waiter
11378 Deputy
11379 Maintenance
11380 Garment District Representative
11381 Meat Manager
11382 Applicator
11383 Assist. Dir. Of Academic Counseling 
11384 Counter sales
11385 Supervisor
11386 nan
11387 Regional Director of Purchasing
11388 HR Specialist
11389 nan
11390 nan
11391 Dental Assistant
11392 Professor
11393 Senior Accountant
11394 Ironworker
11395 Store manager 
11396 Postdoctoral Scholar
11397 Trainer
11398 Manager
11399 Sales
11400 Nur

12678 assistant account manager
12679 Production Support Manager
12680 Street maintence worker
12681 direct care specialist
12682 Human Service Specialist
12683 Personal Banker
12684 Environmental Safety Manager
12685 fleet service clerk
12686 Administrative and Fiscal Manager
12687 Operations Manager
12688 Assistant Manager 
12689 Director of External Reporting
12690 Recruiting specialist
12691 Congregational Engagement Associate
12692 Office Manager
12693  Associate Director Portfolio Management
12694 Chief Deputy Director
12695 Physicians Assistant
12696 Customer care agent
12697 Dental Hygienist
12698 Manager
12699 Warehouse Lead
12700 Police Sergeant 
12701 Customer Service 
12702 School Bus Driver
12703 Assistant Supervisor 
12704 nan
12705 Oral Surgery Assistant
12706 Registered Nurse
12707 Packer
12708 nan
12709 Group Coordinator
12710 Process Control Specialist
12711 CDL driver 
12712 Pipefitter 
12713 Applications Service Analyst 
12714 Supervisor
12715 District Manager
12716

13764 Coordinator Cultural Events
13765 Asst Dietary Manager
13766 Administration Manager 
13767 nan
13768 Waitress 
13769 Owner/Director
13770 tech
13771 nan
13772 Driver 
13773 Mason
13774 Senior Strategic Planner
13775 nan
13776 District Manager
13777 Soldier
13778 CHILD CARE
13779 General Manager 
13780 nan
13781 Registered Nurse
13782 Project Coordinator 
13783 nan
13784 Human Resources Assistant
13785 Mortgage Underwriter
13786 Graduate Research Assistant
13787 owner
13788 admin asst
13789 owner
13790 Customer Service Supervisor
13791 Infant teacher
13792 RN
13793 RN
13794 Nys Court officer
13795 Harbor Patrolman 
13796 Finance Manager
13797 Flight Attendant
13798 Maintenance manager
13799 Technical Product Manager
13800 Service Center Team Leader
13801 Sales Associate
13802 Admin Asst
13803 Data Analyst Technician
13804 Comptroller
13805 Claims Director
13806 Quality Analyst
13807 Marketing Supervisor
13808 CEO
13809 Hostess
13810 Tenderloin Boxer
13811 Director
13812 Local depu

15092 Project Facilitator
15093 Athletic events coordinator
15094 Creative Content Director
15095 Carpenter 
15096 Lead Systems Specialist
15097 AUTO MECHANIC
15098 Driver
15099 Yso 
15100 Engineering Specialist
15101 Accounts Payable 
15102 Warehouse
15103 FIELD CONSTRUCTION COORDINATOR
15104 Auto Parts Specialist
15105 global content director
15106 Scheduler
15107 Attorney
15108 Technical Manager II
15109 bottom pour pittman
15110 Accounting Associate
15111 energy technician
15112 Senior Leadership
15113 Pre-need Funeral Director
15114 Purchasing
15115 Provider Services
15116 Patient Access Specialist
15117 Teacher
15118 Production Manager
15119 Lending Manager
15120 Sr Risk Analyst
15121 Primary Nurse - Case Manager
15122 Software Engineer
15123 Pharmacy Technician
15124 Registered Nurse
15125 Manager
15126 Correction officer
15127 Mental Health Therapist
15128 Installer
15129 Supervisor
15130 Lead Aircraft Mechanic
15131 nan
15132 Manager, Web Development
15133 Laborer
15134 Manage

16178 nan
16179 Nurse
16180 Customer service
16181 Poker Dealer
16182 Pipe fitter
16183 Manager
16184 Accountant
16185 Director of Patient Care Services 
16186 natural resource manager
16187 Sales Solutions Associate
16188 Counter Sales
16189 General Manager
16190 Software Engineer
16191 Owner
16192 Owner
16193 nan
16194 Sr. Software Quality Engineer
16195 Program manager
16196 Foreman
16197 MEDICAL ASST
16198 nan
16199 Sr. Engineering Analyst
16200 AVP
16201 Senior Director
16202 Operations Manager
16203 Medical personnel 
16204 Nurse
16205 Staff Accountant
16206 electrical supervisor
16207 Truck driver
16208 Owner
16209 nan
16210 Manager
16211 Flight Crew
16212 Financial Management
16213 Eligibility Specialist 
16214 TATTOO ARTIST
16215 District Manager
16216 Operations Fire Captain / Safety Officer
16217 Underwriter
16218 Electrician 
16219 Owner
16220 Registered Nurse
16221 Business Technical Analyst 
16222 Receiving Foreman
16223 Bartender 
16224 Account Manager
16225 nan
16226 St

17447 Manager
17448 nan
17449 Recruiting Coordinator
17450 Senior Account Manager
17451 nurse
17452 nan
17453 nan
17454 Welder Fabricator 
17455 nan
17456 Department Manager 
17457 Owner
17458 clerk
17459 Tutor
17460 TEACHER 
17461 nan
17462 Superintendent
17463 Teacher
17464 Driver
17465 Work planner
17466 truck driver
17467 branch supervisor
17468 Truck driver
17469 Owner/Editor
17470 nan
17471 Software Consultant
17472 Ticket Sales
17473 nan
17474 Loan Technician
17475 Design Engineer
17476 Senior Counsel
17477 Stacionary system assistem operator
17478 State Police
17479 CSR
17480 Director of operations 
17481 Owner
17482 Machinist
17483 Maintenance
17484 nan
17485 QCG Specialist
17486 Respiratory Care Practitioner
17487 Laser Technician
17488 Mechanic
17489 welder/machinist
17490 Technician
17491 Financial Analyst
17492 Owner/Operator
17493 HONOR BAR ATTENDANT
17494 Physician
17495 Medical assistant
17496 nan
17497 Banker
17498 Head Maintenance
17499 loader
17500 nursing technician

18642 Revenue Agent
18643 Dry Wall Finisher
18644 Asst.Facilities Mngr
18645 Manager
18646 Bookkeeper
18647 Senior Software Engineer
18648 Truck driver
18649 Document control
18650 psc
18651 Electrician
18652 Tech Support Engineer
18653 Principal
18654 Adjunct Professor 
18655 Woodworker
18656 Technician
18657 Hospitality Applications Manager
18658 Supplier Management
18659 Sales
18660 Snr Business Development Mgr.
18661 Operations Manager
18662 Design Assurance Technician 
18663 Social Media Manager
18664 Patient Care Coordinator
18665 Association Manager
18666 nan
18667 Director of Events
18668 Slot floorperson
18669 Director
18670 Assistant Nurse Manager
18671 DRIVER
18672 nan
18673 Financial Aid Advisor
18674 Stylist
18675 Valet parking
18676 Corporate Services
18677 Tech Services Project Supervisor
18678 SUPERVISOR
18679 nan
18680 Supervisor Food Services 
18681 owner
18682 Manager
18683 Business analyst
18684 Area manager
18685 Leasing consultant
18686 owner
18687 351st Medical A

19983 executive admin
19984 Appraiser trainee
19985 Team lead
19986 Lieutenant
19987 E-4
19988 Clerk
19989 Owner
19990 nan
19991 Maintenance Technician 
19992 senior software engineer
19993 Design Specialist
19994 Salesman
19995 Partner
19996 Senior Property Manager
19997 Mechanic engineer
19998 Teacher
19999 Director of Design
20000 Senior Manager
20001 Deposit Specialist
20002 owner
20003 Executive Director
20004 ServiceNow Admin
20005 nan
20006 Investment Analyst
20007 superviser
20008 Inventory Mgmt. Specialist
20009 Admission Coordinator
20010 Sales
20011 Technician
20012 Office Manager/Marketing
20013 teacher
20014 Sales
20015 Production
20016 supervisor
20017 COMMERCIAL BUSSINES DIRECTOR
20018 nan
20019 Environmental Services 
20020 Owner
20021 nan
20022 corporate analyst
20023 Stage Hand
20024 consumer affairs specialist
20025 Front End
20026 owner
20027 Teacher
20028 sales rep
20029 EFDA
20030 SR Certified Pharmacy Technician
20031 Legal Secretary
20032 Psychologist
20033 Lumb

21146 Purchasig
21147 Assistant Manager
21148 Teacher
21149 maint. worker
21150 Owner
21151 nan
21152 Pediatric dentist
21153 Manager Group Sales and Resolutions
21154 Store manager
21155 Clinical Manager
21156 Insurance Appraiser
21157 Service Manager
21158 Registered Nurse
21159 Owner
21160 Clerk
21161 Electrician
21162 Pharmacy technician
21163 nan
21164 driver
21165 Administrative Assistant III
21166 Chief Petty Officer
21167 Sr. Associate Scientist
21168 CNA
21169 Executive Assistant
21170 nan
21171 Produce Manager
21172 group director 
21173 owner
21174 Engineer
21175 Cloud Automation Engineer
21176 nan
21177 Enrollment Specialist
21178 maintenance tech
21179 Industrial Engineer 
21180 Supervisor
21181 Vet Tech
21182 Law Clerk
21183 owner
21184 Secretary
21185 Management 
21186 Clerical support 
21187 Merchandiser 
21188 Investigator 
21189 Security Supervisor
21190 Teacher
21191 IT Manager
21192 Accountant
21193 Project Manager
21194 Shift Manager/Associate
21195 Wastewater plan

22475 Machinist
22476 Teacher
22477 Computers
22478 Repair Specialist
22479 Quality Manager
22480 Lead Ramp Agent 
22481 nan
22482 Client Relationship Manager
22483 Teacher
22484 Assistant Store Manager 
22485 Driver
22486 Supervisor
22487 Publisher
22488 Manager
22489 Water Plant Manager
22490 retail ops manager
22491 Personal trainer 
22492 Beverage server
22493 Family Services Worker
22494 RN
22495 Director of nursing
22496 used car manager
22497 Crew Chief
22498 Professor
22499 nan
22500 nan
22501 maintance 
22502 MMA Fighter
22503 CHECK PROCESSOR
22504 service
22505 Maintenance technician 
22506 nan
22507 Production Manager
22508 nan
22509 Food Service Instructor
22510 Account manager
22511 owner/operator
22512 General manager
22513 nan
22514 nan
22515 Account Manager
22516 Legal Secretary 
22517 RN
22518 Senior Associate
22519 Teacher
22520 Vice President
22521 Sr Contract and Grant Officer
22522 Medical assistant 
22523 Director, Transaction Management
22524 Store Manager
22525 

23637 Truck driver
23638 Program Manager
23639 Foreman
23640 District Executive
23641 Senior Engineer
23642 Software Developer
23643 ReplenishmenSpecialist
23644 Owner
23645 manager
23646 nan
23647 Retail Team Manager
23648 Project Manager
23649 President and CEO
23650 Radio Freequency Engineer 
23651 Accounting
23652 OFFICE MANAGER ACCOUNTANT
23653 Physician
23654 Customer Service Representative 
23655 nan
23656 Cook
23657 Respiratory Therapist
23658 Journeyman Millwright
23659 teacher
23660 INSURANCE AGENT
23661 nan
23662 teacher
23663 nan
23664 Teacher
23665 Loan Processor
23666 Office Manager
23667 President
23668 nan
23669 Production Manager
23670 President 
23671 director of dining
23672 Head of School
23673 Assistant Nurse manager
23674 Painter
23675 Machine opa
23676 Truck Driver
23677 IT Systems Engineer
23678 Owner
23679 Assoicate
23680 Branch Office Administrator 
23681 Agency Manager
23682 owner
23683 Clerk
23684 Certified Nurse Aid 
23685 PROJECT MANAGER
23686 Service Tech

24945 TRUCK DRIVER/STATION ENGINEER
24946 Creative Director
24947 Truck Driver
24948 Corporate Trainer 
24949 nan
24950 Parts Inspector 
24951 Bartender 
24952 Service Manager
24953 Area Sales Manager
24954 Driver/ Installer 
24955 Project Manager
24956 Marketing
24957 Electrician apprentice 
24958 Business Process and Project Lead
24959 Teacher
24960 Coordinator
24961 BDC REP
24962 Language instructor 
24963 Minister
24964 Senior Financial Analyst
24965 nan
24966 Vice President
24967 foreman
24968 Merchandise Coordinator 
24969 LNA NURSE
24970 Information Business Analyst II
24971 Tech associated
24972 nan
24973 Residential sales manager
24974 Teacher
24975 Sales Director
24976 Personal Attendant
24977 Owner
24978 Fitness Instructor 
24979 Admin Assistant 2
24980 supervisor
24981 Registered Nurse 
24982 Pharmacy Manager
24983 nan
24984 Staff Accountant
24985 Database Administrator
24986 Transport Officer 
24987 Sr operator
24988 General Manager
24989 Teacher
24990 Filler Operator 
249

26163 Commissioner 
26164 Owner
26165 nan
26166 nan
26167 IT Specialist 
26168 Machine Shop Owner
26169 Support
26170 Managing Member
26171 Mechanic 
26172 Patient Advocate
26173 Retail Sales Consultant
26174 nan
26175 Sales Manager
26176 BUS OPERATOR
26177 Accounting 
26178 Mailhandler
26179 Payroll Supervisor
26180 nan
26181 Laborer
26182 RN
26183 Investigator
26184 nan
26185 Assistant Administrator 
26186 Financial analysts 
26187 Medical Record Retrieval Specialist
26188 Operations Manager
26189 nan
26190 Teacher
26191 nan
26192 Facility Manager
26193 manager
26194 Guest Relations
26195 Project manager 
26196 Server
26197 Personal Banker
26198 Chef 
26199 Information Processing Specialist
26200 Facilities Mechanic
26201 nan
26202 Hydro-mechanical systems engineer
26203 Equipment processor 
26204 table games dealer 
26205 Foreman
26206 systems administrator
26207 Production and lead
26208 Professional Driver
26209 Case manager 
26210 Mainenance 
26211 Sr. Network Specialist 
26212 V

27219 CNC Machinist
27220 Regional Manager
27221 DATA ANALYST
27222 IT Mgr.
27223 High School Teacher
27224 Senior IT Audit Analyst
27225 Sr Applications Engineer
27226 Maintenance
27227 Executive Assistant
27228 Assistant Vice President
27229 Driver
27230 Biller
27231 Test Lab Tech II
27232 Cook
27233 crew assistant 
27234 Manager
27235 nan
27236 Community Association Manager
27237 News Photographer
27238 Loan Processor
27239 Administrative
27240 Hvac lead mechanic
27241 Store Manager
27242 Instructor
27243 AR Cash App Specialist
27244 Operations manager
27245 Paralegal
27246 Assembly repair 
27247 Facilities engineer
27248 Accountant
27249 owner
27250 Customer Support
27251 Employment Resource Specialist
27252 Die cut operator 
27253 Caseworker 
27254 environmental service aide
27255 Secretary
27256 Maintenance worker
27257 Warehouse Manager
27258 Branch assistant manager 
27259 nan
27260 Mechanical integrity Engineer 
27261 Pharmacist
27262 Manager 
27263 nan
27264 Network Engineer


28430 Asst. Proj. Manager
28431 Administration Manager
28432 Teacher
28433 Store Manager
28434 Heavy rotator operator 
28435 nan
28436 Supervisor/Owner
28437 Performance Coach
28438 Administrative Assistant
28439 Installer
28440 Assistant Director Automotive
28441 Sleep Expert
28442 Owner 
28443 Marketing
28444 Nurse
28445 Dispatcher
28446 Sr. Project Manager
28447 nan
28448 Business Travel Sales Coordinator
28449 Principal 
28450 Floor hand
28451 Merchandiser
28452 Manager
28453 Salesman
28454 Registered Sanitarian
28455 Teacher
28456 Counterman
28457 Police
28458 Registered Respiratory Therapist
28459 Police Dispatch Supervisor
28460 operations supervisor
28461 Group lead
28462 Forman
28463 Industrial Engineer Admin.
28464 Environmental Specialist
28465 OPERATIONAL BRANCH MANAGER
28466 Quality Manager 
28467 Right-of-Way Agent
28468 Postbac research
28469 Sheet metal mechanic
28470 Security 
28471 Senior business control manager
28472 Operator
28473 Auto Claims Adjuster
28474 Sale Co

29646 1st Deputy treasurer
29647 WAREHOUSE MANAGER ASSIST
29648 owner/president
29649 Driver
29650 Customer Service Rep
29651 DIGITAL PRESS OPERATOR
29652 Real Estate Broker
29653 Logistic 
29654 Marketing Manager
29655 Commercial Loan Analyst
29656 Fabricator
29657 Chief Compliance Officer
29658 Administrator
29659 Principal Transportation Planner
29660 Area Advisor
29661 Line operator 
29662 Machine Operator
29663 Truck Driver
29664 Dealer
29665 Driver
29666 nan
29667 project manager 
29668 nan
29669 Sales
29670 Chief Communications Officer
29671 Business Acountant 
29672 owner
29673 Warehouse Lead
29674 Coordinator of Graduate Admissions
29675 Paramedic
29676 plant manager
29677 Manager
29678 Service Manager
29679 Manager of Reporting and Data Quality
29680 Engineer
29681 Branch Manager
29682 Store Manages
29683 Engineer
29684 Sales Consultant 
29685 Receiver
29686 Academic Director
29687 Carrier
29688 HUA
29689 Management Analyst
29690 Management
29691 Nurse Practitioner 
29692 own

30683 RN
30684 Data Analyst
30685 Head of Design
30686 installer
30687 server
30688 installer
30689 Market Revenue Management Leader
30690 Vice President - Retail Operations
30691 assembly
30692 Test lead
30693 Director
30694 CEO
30695 Craovac Opperater
30696 Adjuster
30697 Senior Rabbi 
30698 nan
30699 Wastewater Supervisor
30700 L&D Consultant
30701 Diesel Technician 2
30702 Teacher
30703 nan
30704 Associate Vice President
30705 Professor
30706 vocational counselor
30707 Security officer 
30708 Project Manager
30709 President
30710 Controller
30711 Setup operator
30712 Chief Operating Officer
30713 Administrative Coordinator
30714 ART DIRECTOR
30715 Senior Project Manager
30716 Patient coordinator 2
30717 Client Underwriting Specialist 
30718 CCY / Derivatives Surveillance Analyst 
30719 Installer 
30720 Realtor
30721 Senior Systems Analyst
30722 New vehicle attendant
30723 Golf pro
30724 Office manager
30725 nan
30726 Driver
30727 President
30728 Supervisor 
30729 rural carrier
3073

31981 Retention agent
31982 Driver
31983 Engineering Supervisor
31984 CTE Teacher
31985 Director of Medical Records
31986 Project Engineer
31987 Adult Services Social Worker
31988 Program Manager
31989 Dot/Safety Coordinator 
31990 Investigator
31991 Administrative Assistant
31992 Pharmacy Tech
31993 Aircraft mechanic
31994 Teacher
31995 Manager Trainee
31996 Sr Analyst 
31997 machinist
31998 Product Line Manager
31999 Head Chef
32000 Owner
32001 Refresh Installer Technician
32002 Regional Business Banking Advisor
32003 designer
32004 Senior Practice Manager II
32005 Plant manager 
32006 Sales
32007 Manager
32008 Registered Nurse
32009 Director, Customer Data
32010 nan
32011 Instructional assistant
32012 Education and Culture Coordinator 
32013 Guest Services
32014 nan
32015 nan
32016 server
32017 Program & Donations Coordinator
32018 Production Associate
32019 secretary
32020 owner
32021 Manage
32022 Sales manager
32023 Machine Operator
32024 Appointment Clerk
32025 Project manager
32

33167 nan
33168 Web Developer
33169 VP Strategy & Corporate Development
33170 manager
33171 Truck driver
33172 nan
33173 Account Manager
33174 Sales Manager
33175 manager
33176 Driver
33177 attendant
33178 Sales Engineer
33179 Administrative Assistant
33180 Systems Technician
33181 Costumer service agent
33182 Claims Adjuster 
33183 Site Billing Coordinator 
33184 nan
33185 driver
33186 nan
33187 Pilot
33188 store manager
33189 Imports manager
33190 Assistant Principal 
33191 Sr. Project Manager
33192 Executive Director
33193 Structural Engineer EIT
33194 Server
33195 Senior Maintenance Planner
33196 Manager
33197 Principal
33198 Caregiver 
33199 Order Processor
33200 MDS Coordinator
33201 Parts Manager
33202 Sheriff's Service Specialist
33203 Special Ed Teacher
33204 Assistant Technology Coordinator
33205 Traffic system supervisor 
33206 IE
33207 Technical Recruiter 
33208 vlerk
33209 JDS
33210 Guest Service Attendant
33211 Customer Representative
33212 Recruiter
33213 lead anesthesia

34415 OPS Sr Specialist
34416 nan
34417 nan
34418 Senior Business Analyst
34419 Sales
34420 Admin Office Manager
34421 construction managment
34422 Office Manager
34423 Care Provider
34424 Food Service
34425 painter
34426 Structural  Sales
34427 Operating Engineer
34428 Health Educator II
34429 SystemsAdministrator
34430 Medical Assistant / Front Offie
34431 maint mech 2
34432 Project Manager 2
34433 nan
34434 Office manager
34435 Supervisor 
34436 Eligibility Specialist
34437 ASSITANT SUPERVISER
34438 Dir. Business Development
34439 maintenance 
34440 nan
34441 teacher
34442 Attorney 
34443 Business Analyst
34444 nan
34445 Administrative Manager
34446 VP of Purchasing
34447 nan
34448 Senior Branch Associate
34449 Marketing
34450 CONTROLLER
34451 Csa
34452 Environmental Engineer
34453 Nurses assistant
34454 Scheduling and Planning Manager
34455 Exec. Asst
34456 Security Guard
34457 DRIVER
34458 nan
34459 Manager
34460 nan
34461 RN
34462 Field Service Rep
34463 Sales
34464 Faculty Assoc

35573 Maintenance
35574 DAE On-Highway NC
35575 Forester
35576 Owner operator
35577 activities coordinator
35578 Practice Manager 
35579 Family caregiver
35580 nan
35581 IT
35582 Sales
35583 nan
35584 Judicial Assistant
35585 Facility Technician 
35586 SUPERVISOR
35587 Driver
35588 Compressor specialist 
35589 police sergeant
35590 Foreman
35591 President
35592 Sales
35593 Child Welfare
35594 Marketing Manager
35595 CASE MANAGER
35596 Area VP
35597 nan
35598 Admit assist
35599 Project Attorney
35600 National General Adjuster
35601 Unit Secretary
35602 Certification Manager
35603 Administrator 
35604 Truck driver
35605 Administrative Coordinator
35606 Professor
35607 Sales Engineer
35608 Customer service /cashier
35609 Bcba 
35610 nan
35611 Marketing Operations Engineer
35612 Driver
35613 Teacher
35614 scientific advisor
35615 Top Build Labor Hand
35616 Warehouse worker 
35617 School Psychologist 
35618 school bus driver
35619 Teacher
35620 Forklift 
35621 Machine Operator 
35622 Qualit

36801 Assoc Director
36802 owner
36803 nan
36804 Programming Machinisy
36805 Nurse Practitioner
36806 Co-Owner
36807 Director of Security
36808 nan
36809 officer
36810 Area Manager Florida
36811 Speech language Pathologist
36812 Business Specialist
36813 Lobbyist
36814 Director
36815 ceo
36816 nan
36817 Chef 
36818 Maintenance Supervisor
36819 Paraplanner
36820 Training Officer
36821 Associate Director, Security
36822 Lieutenant 
36823 Software Engineer Asc
36824 Social Worker
36825 sales
36826 Sr. Mortgage Specialist
36827 Asst Auditor
36828 Sr. Global Payroll Manager
36829 teacher
36830 Certified Pharmacy Technician
36831 Sales Manager 
36832 Administrative Assistant
36833 Attorney
36834 Vice President
36835 IT Customer Support
36836 General Manager
36837 Nurse case manager
36838 nan
36839 Associate
36840 Store Manager 
36841 Elemintary Principal
36842 Business Intelligence Designer
36843 regional supervisor
36844 ACCOUNT REPRESENTATIVE
36845 customer service rep
36846 OWNER OPERATOR

38082 Inventory
38083 Director of coaches
38084 Supervisory Forestry Technician
38085 Registered Nurse 
38086 nan
38087 Sales producer 
38088 Vice President, West Coast Operations
38089 Optician
38090 Assistant Golf Course Superintendent
38091 HHA
38092 nan
38093 IT Manager
38094 Caregiver 
38095 Graduate Teaching Assisitant
38096 Server
38097 Sales Associate
38098 nan
38099 client solutions specialist
38100 Assistant Payment Worker
38101 Buyer/Planner
38102 Auto Mechanic
38103 Price integrity
38104 Driver
38105 General Labor
38106 Software analyst
38107 Material Technician
38108 Expo
38109 nan
38110 Site Coordinator 
38111 School Psychologist
38112 Account Manager
38113 Branch Banker
38114 Hygienist
38115 OWNER
38116 Teacher
38117 Auto mechanic
38118 Bookkeeper
38119 Underwriting Sr. Specialist QA
38120 nan
38121 Assistant 
38122 Manager
38123 Nurse technician
38124 Journeyman plumber
38125 engineer
38126 Branch Manage
38127 BUSINESS ANALYST
38128 mechanic
38129 Physical therapist
381

39339 Wood shop supervisor
39340 Licensed Practical Nurse
39341 Division Director
39342 Regulatory compliance senior investigato
39343 Doctor
39344 Training Coordinator
39345  meat supervisor
39346 Accounts Payable Supervisor
39347 Recruiter
39348 Pite Fitter
39349 Teacher
39350 Department manager 
39351 Customer service 
39352 Quality Supervisor
39353 nan
39354 Secretary
39355 Regional Practice Administrator
39356 correcttional officer
39357 Soccer Coach
39358 Manager
39359 Lead Nurse
39360 Property management
39361 Site Coordinator 
39362 Contract Administrator
39363 Medical Coder
39364 Assistant manger
39365 Senior Pastor
39366 Assistant Director
39367 District Manager
39368 Billing Administrator
39369 RN
39370 CMA
39371 Paralegal
39372 Cashier 
39373 parts mnager
39374 Supervisor
39375 nan
39376 Owner
39377 Server
39378 Site Superintendent
39379 President
39380 Planner
39381 Owner
39382 manager
39383 Payroll Coordinator
39384 project controls administrator
39385 Assistant General C

40572 Administrative Supevisor
40573 Driver
40574 Teacher
40575 Personal Trainer
40576 Principal Software Architect
40577 Teacher
40578 Laborer
40579 nan
40580 Underwriter
40581 Environment tech
40582 Nurse
40583 Graphic Artist
40584 Director
40585 Flowback
40586 Marketing Communications
40587 COO
40588 Janitor
40589 Supervisor
40590 IT manager
40591 Fixed Operations Director 
40592 DME technician
40593 Secretary 
40594 General Managere
40595 sales
40596 Accountant
40597 Collection Clerk
40598 nan
40599 Sales Manager
40600 Regional Plant Operations Manager
40601 TTA/Splicer
40602 Asst. Manager
40603 Branch Manager
40604 Wood Finisher
40605 manager
40606 QA Lead
40607 nan
40608 Director
40609 Director of Early Childhood
40610 Counselor
40611 Software Engineer
40612 Director of Personal Training
40613 District manager
40614 Regional manager
40615 admin
40616 teacher
40617 Personal Financial Specialist 
40618 loan officer 
40619 owner
40620 Freelance Photo Retoucher
40621 Deputy CIO
40622

41759 Homicide Detective
41760 parts manager
41761 Doc Mgmt Clerk
41762 grant research specialist
41763 PE Teacher 
41764 Food Clerk
41765 Financial Manager
41766 Eligibility Worker 2
41767 Driver
41768 Resident care manager RN
41769 Network Engineer
41770 Driver
41771 Premier Banker
41772 Public Safety
41773 Social Media Specialist
41774 Clerk
41775 Director of Owned Properties
41776 nan
41777 Register nurse 
41778 Security Officer
41779 Truck Driver
41780 Driver
41781 President
41782 Sales rep
41783 Provider Enrollment
41784 Pharmacy tech
41785 Manager
41786 Market Researcher
41787 Law Enforcement 
41788 Attorney
41789 Account Manager
41790 Manager
41791 Department Head
41792 Psychologist
41793 911 Operator
41794 Operations Associate
41795 Machine Attendant 
41796 Accountant
41797 Recruiter
41798 Teacher
41799 nan
41800 Clinical Research Associate
41801 Sales
41802 Payment center supervisor
41803 Conductor
41804 QA Document Specialist
41805 General Contractor
41806 Sr.Programer Analy

43044 Teacher
43045 court technician
43046 Human Resource Manager
43047 Loan Advisor
43048 Enrollment Recon Specialist
43049 Warehouseman
43050 memberservice
43051 Branch Manager
43052 owner
43053 Regional Manager
43054 Teacher
43055 Manager
43056 Support services coordinator
43057 Executive Secretary
43058 Machine operator 
43059 Chief Technology Officer
43060 ownere
43061 Administration 
43062 Department Manager
43063 Manufacturing Engineer 
43064 Shifter/Air Driver
43065 Dentist 
43066 Technician
43067 nan
43068 Teachet
43069 Teacher
43070 technician
43071 Alarm technician
43072 Claims Associate
43073 LOGISTICS COORIDATE
43074 Analyst
43075 Braille specialist 
43076 Retail Store Manager
43077 Engineer Intern
43078 Electrician
43079 City Carrier
43080 financial Advisor
43081 Owner
43082 Manager
43083 nan
43084 IT Helpdesk 
43085 DevOps Engineer
43086 SHERIFF OFFICER
43087 relationship manager
43088 owner operator
43089 Server
43090 School Counselor 
43091 Caretaker X
43092 Benefit Sp

44205 System Administrator
44206 President
44207 Financial Consultant 
44208 Regional Sales Manager
44209 mailman
44210 Bio-med technician
44211 Food Service Officer
44212 Regional Customer Service Leader
44213 Human Resources/investigator
44214 Assistant Vice President
44215 Rocket builder
44216 nan
44217 Teacher
44218 IT Systems Engineer II
44219 sou chef
44220 TEAM LEAD
44221 Rewindero
44222 Manicurist
44223 CNA
44224 Laborer
44225 Sr. Credit Administator
44226 Pollution & Safety Advisor
44227 X-rays tech
44228 Director
44229 System Administrator
44230 System Engineer
44231 Customer Service SME
44232 Manager
44233 General Warehouse laborer 
44234 Sr. Systems Engineer
44235 Sales
44236 Business Development 
44237 Paraprofessional 
44238 Sr. Director of Quality
44239 Field service technician
44240 Design Engineer 2
44241 Pharmacist
44242 long haul driver
44243 District Sales Manager
44244 Software Engineer
44245 Clinical Application Specialist
44246 Distribution Supervisor
44247 Polic

45584 Owner/CEO
45585 Senior Consultant - Software Engineer
45586 Customer Service Representative
45587 nan
45588 Disclosure specialist
45589 Certified Application Counselor
45590 Owner/Spouse
45591 nan
45592 Sales Representative 
45593 General Manager
45594 Sr Project Leader
45595 Maintenance Electrician
45596 Pharmacist
45597 Manager IV
45598 Truck driver 
45599 nan
45600 Police Officer
45601 Customer service
45602 Licensed practical nurse 
45603 BUS DRIVER
45604 senior policy analyst
45605 security officer
45606 nan
45607 Office Manager
45608 Order Specialist
45609 Pharmacist
45610 Real Estate Assistan
45611 Operations 
45612 Sr. Systems Administrator 
45613 Owner
45614 Accountant
45615 Staff Accountant
45616 General Manager
45617 Manager 
45618 Sales
45619 para leagal
45620 F and P 1 
45621 auto tech
45622 TSA
45623 sales
45624 Carpenter
45625 Computer Programmer Analyst
45626 Motor Vehicle Representative
45627 Security Site Supervisor
45628 Teacher
45629 CEO
45630 LMT
45631 Sr. Ad

46817 engineer
46818 nan
46819 Truck Driver
46820 Sales
46821 Development manager
46822 Admin
46823 Airport Customer Service
46824 Maintenance Technician
46825 LPN
46826 Front end manager
46827 Insurance
46828 nan
46829 Machine builder, tool maker 
46830 Waiver worker
46831 Senior Clinical Supervisor
46832 Diecast Engineer
46833 Pressure sewer Specialist
46834 driver
46835 nan
46836 Biomedical 
46837 Senior Manager, Air Traffic 
46838 Facilities coordinator
46839 Staff Accountant
46840 Asst Manager
46841 Driver
46842 Program manager 
46843 Owner
46844 nan
46845 Director of IT
46846 Service Engineer
46847 Staff Attorney
46848 Teacher
46849 Registered Nurse 
46850 driver
46851 Registered Nurse
46852 Correction Officer
46853 sales rep
46854 General Manager
46855 nan
46856 coodinator
46857 Insurance Producer
46858 Account Executive
46859 Account Executive
46860 Aesthetician
46861 Director of Operations
46862 porter /Janitorial
46863 Senior Producer
46864 Principal
46865 Full Stack Web Deve

47940 Customer Service
47941 Senior Director of IT Internal Audit
47942 Receptionist/Switchboard 
47943 nan
47944 Derrickman 
47945 nan
47946 Driver/service Provider
47947 Applications & Systems Administrator
47948 Senior SBA PROCESSOR
47949 nan
47950 teacher
47951 Sales Manager
47952 RN
47953 Technology Specialist
47954 Executive Vice President
47955 Document Control Specialist
47956 Assistant Chief Technician
47957 Solutions Development Architect
47958 VIDEO EDITOR
47959 supervisor
47960 nan
47961 Driver 
47962 Coordinator
47963 maintaince
47964 Registered Nurse
47965 Patient Access
47966 Endoscopy Support Specialist 
47967 Director Of Security
47968 General Manager
47969 Repceptionist
47970 Project Manager
47971 nan
47972 Purchasing agent
47973 nan
47974 Principal Engineer
47975 Injection Mold Operator
47976 Bookkeeper 
47977 registered nurse
47978 Sales
47979 Administrative Assistant
47980 Patient Service Rep
47981 nan
47982 Saftey Department
47983 Sr Quality Specialist
47984 Assoc

49149 Driver
49150 Speech Therapist
49151 Processor
49152 Controls Engineer
49153 Computer Operator
49154 Manager
49155 Firefighter/EMT
49156 PR Manager
49157 Driver
49158 Field-Technical 
49159 Staff Scientist
49160 Data Management
49161 Insulatir
49162 electrician
49163 Manager
49164 Manager/Supervisor
49165 nan
49166 corrections mfg. specialist
49167 system analyst
49168 nan
49169 SERVICE MANAGER
49170 Operations Manager
49171 Lead Clerk
49172  Director of CR Field Services
49173 Vice Provost 
49174 Shipping
49175 Administrative Assistant
49176 owner/designer
49177 Teacher
49178 Financial Representative
49179 Store Manager
49180 Head Teller
49181 Nurse
49182 Management Program Analyst
49183 Manager
49184 nan
49185 Director 
49186 Warehouse foreman
49187 Teacher
49188 Sales
49189 Bartender
49190 Service Tech IV
49191 nan
49192 Inside Sales 
49193 Owner
49194 Senior assistant manager 
49195 Owner
49196 Direct Support Professional
49197 Dock worker
49198 Reminisance Care Coordinator
49

### 배운사람들의 코드, 고오급 python 스킬
numpy를 학습하면서 브로드캐스팅에 관하여 잠깐 언급했었습니다. 그렇다면 그 파워풀하다던 브로드캐스팅은 어떻게 사용해야할까요?
    
>기타 언어에서는 지원하지 않는 기능이니만큼 파이썬의 특징을 가장 잘 살리는 코드  
**`apply`** 함수를 사용하여 인자로 받는 모든 데이터에 함수를 적용

#### apply 함수로 컬럼에 적용시키는 코드 구조
    df['컬럼명'] = df['컬럼명'].apply(lambda x: func(x) if 조건문)

In [140]:
%%time
# apply() 함수사용 반복이 가능한 데이터구조의 모든 인자에 적용
# lambda 각 인자에 적용할 함수 혹은 연산
df['emp_title'] = df['emp_title'].apply(lambda x: x.lower())

CPU times: user 13.9 ms, sys: 580 µs, total: 14.5 ms
Wall time: 14.1 ms


In [141]:
# 대소문자 구분을 처리한 값 확인
df['emp_title'].value_counts()
# 기존 value_count 값과 차이가 있음을 확인 할 수 있습니다.
# 제공 된 데이터셋이라도 이와 같은 작은 차이가 있을 수 있습니다.
# 데이터를 꼼꼼하게 살펴볼 수록 디테일한 차이를 만들 수 있습니다.

nan                                   4242
owner                                 1149
manager                               1064
teacher                               1028
driver                                 536
registered nurse                       467
sales                                  455
supervisor                             432
rn                                     362
truck driver                           295
general manager                        289
office manager                         267
project manager                        264
president                              245
director                               237
nurse                                  188
sales manager                          186
engineer                               183
operations manager                     162
account manager                        151
server                                 148
administrative assistant               145
police officer                         143
technician 

In [142]:
# owner인 사람들 샘플링
df[df['emp_title'] == 'owner']

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,purpose,title,zip_code,addr_state,dti,delinq_2yrs
58,6000,6000,5975.0,36 months,15.05,208.14,C,C4,owner,10+ years,OWN,55000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,372xx,TN,20.07,1.0
176,17525,17525,17525.0,60 months,20.00,464.31,D,D4,owner,4 years,RENT,6000.0,Not Verified,Dec-2017,Fully Paid,n,debt_consolidation,Debt consolidation,320xx,FL,21.40,0.0
327,7000,7000,7000.0,36 months,12.62,234.58,C,C1,owner,10+ years,RENT,72000.0,Source Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,947xx,CA,7.18,0.0
354,8000,8000,8000.0,36 months,6.72,246.00,A,A3,owner,5 years,RENT,42000.0,Not Verified,Dec-2017,Current,n,credit_card,Credit card refinancing,329xx,FL,18.00,0.0
362,15000,15000,15000.0,60 months,7.35,299.51,A,A4,owner,10+ years,MORTGAGE,70000.0,Not Verified,Dec-2017,Current,n,small_business,Business,410xx,KY,14.44,0.0
511,20000,20000,20000.0,60 months,13.59,461.13,C,C2,owner,1 year,MORTGAGE,85000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,673xx,KS,33.66,0.0
570,24875,24875,24875.0,60 months,11.99,553.21,B,B5,owner,10+ years,MORTGAGE,50000.0,Source Verified,Dec-2017,Current,n,home_improvement,Home improvement,395xx,MS,26.19,0.0
614,15000,15000,15000.0,60 months,20.00,397.41,D,D4,owner,< 1 year,RENT,25000.0,Not Verified,Dec-2017,Current,n,other,Other,770xx,TX,48.97,0.0
640,13675,13675,13675.0,36 months,7.97,428.34,A,A5,owner,10+ years,MORTGAGE,113000.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,940xx,CA,19.47,1.0
895,12000,12000,12000.0,36 months,6.72,369.00,A,A3,owner,4 years,MORTGAGE,110000.0,Source Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,300xx,GA,2.44,0.0


In [145]:
# 샘플링 된 데이터프레임의 단일 컬럼 접근
df[df['emp_title'] == 'owner']['annual_inc']

58        55000.0
176        6000.0
327       72000.0
354       42000.0
362       70000.0
511       85000.0
570       50000.0
614       25000.0
640      113000.0
895      110000.0
897       85000.0
1007     220000.0
1059      88000.0
1140     120000.0
1204      75000.0
1285      85000.0
1294     200000.0
1299     100000.0
1328     164000.0
1337      89000.0
1424     110000.0
1511     200000.0
1516      68000.0
1573     400000.0
1575      80000.0
1649     400000.0
1721      65000.0
1726     125000.0
1734      75000.0
1782      25500.0
           ...   
48895    130000.0
48897     58576.0
48902     50000.0
48903     72566.0
48950     60000.0
48994     60000.0
49021     46000.0
49052     60000.0
49087     25000.0
49193     60000.0
49195    110000.0
49233    100000.0
49291     80000.0
49323     65000.0
49355     85000.0
49359    125000.0
49454     95400.0
49479     46500.0
49560     72000.0
49647     47000.0
49712     54000.0
49768    125000.0
49824    125000.0
49863     75415.0
49864     

In [147]:
# 컬럼 평균값 계산
df[df['emp_title'] == 'ceo']['annual_inc'].mean()

145263.24031007753

In [146]:
# 코드 하나 변경으로 간단한 분석 가능
# owner가 아닌 사람들의 평균
df[df['emp_title'] != 'owner']['annual_inc'].mean()

77638.40680763956

In [148]:
# 각 직업별 평균연봉이 궁금하다
# 엑셀의 pivol table 과 비슷한 기능
df.groupby('emp_title').mean()

Unnamed: 0_level_0,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,dti,delinq_2yrs
emp_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
field operator,20000.0,20000.0,20000.0,9.440000,640.100000,130000.000000,4.320,0.0
sr. mechanical assesmbler,20000.0,20000.0,20000.0,16.020000,486.580000,52000.000000,29.830,0.0
air traffic controller,8000.0,8000.0,8000.0,6.720000,246.000000,101000.000000,21.670,0.0
aircraft mechanical assembler,10000.0,10000.0,10000.0,6.720000,307.500000,57000.000000,10.250,0.0
appraiser,5000.0,5000.0,5000.0,20.000000,185.820000,40000.000000,29.070,1.0
armed guard/technician,12000.0,12000.0,12000.0,14.080000,410.600000,48500.000000,15.610,1.0
associate business management analyst,7200.0,7200.0,7200.0,6.720000,221.400000,81000.000000,11.350,0.0
associate director portfolio management,9000.0,9000.0,9000.0,19.030000,330.050000,200000.000000,21.780,0.0
business consultant,7500.0,7500.0,7500.0,6.720000,230.620000,50000.000000,9.510,0.0
carpender,11200.0,11200.0,11200.0,10.910000,366.200000,100000.000000,16.930,0.0


In [149]:
# 위 테이블에서 연간수입 접근
df.groupby('emp_title').mean()['annual_inc']

emp_title
  field operator                            130000.000000
  sr. mechanical assesmbler                  52000.000000
 air traffic controller                     101000.000000
 aircraft mechanical assembler               57000.000000
 appraiser                                   40000.000000
 armed guard/technician                      48500.000000
 associate business management analyst       81000.000000
 associate director portfolio management    200000.000000
 business consultant                         50000.000000
 carpender                                  100000.000000
 cashier                                     30000.000000
 cdl driver                                  69000.000000
 child nutritionist server                   25000.000000
 contractor                                 142000.000000
 coordinators                                62000.000000
 crew leader                                 63000.000000
 deputy director of recreation               56000.000000
 dir

In [150]:
# 데이터정렬
df.groupby('emp_title').mean()['annual_inc'].sort_values(ascending=False)

emp_title
sr tech writer                        8000000.0
trash truck driver                    6500031.0
billing analyst                       2418000.0
tourist guide                         2416960.0
c.o.o.                                1150000.0
full-time rn                          1116000.0
contracts superintendent              1000000.0
film editor                            988000.0
security guard                         960831.6
manager director fixed income          900000.0
senior client partner                  850000.0
terminal operator                      810000.0
svp general merchandise manager        780000.0
vp of development                      750000.0
sales & marketing                      741250.0
partner and managing director          700000.0
partner physician                      700000.0
physician/medical ditector             660000.0
interventional cardiologist            650000.0
electronic assembly                    650000.0
cardiologist                  

## 결측치 처리
> 데이터 분석을 위해서는 데이터셋 내에 빈 값이 있는 경우 분석에 방해가 될 수 있는 여지가 많습니다.  
모든 결측치를 없애야 하는 것은 아니지만 되도록이면 결측치를 채우는 방법, 혹은 없애는 방법등으로 결측치를 처리합니다.  
몇가지 예시를 살펴보면서 결측치 처리에 대해 알아봅시다.

In [151]:
# info() 함수는 결측치에 대한 정보도 보여줍니다.
# 컬럼별 isnull() 함수를 사용해도 무방합니다.
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 22 columns):
loan_amnt              50000 non-null int64
funded_amnt            50000 non-null int64
funded_amnt_inv        50000 non-null float64
term                   50000 non-null object
int_rate               50000 non-null float64
installment            50000 non-null float64
grade                  50000 non-null object
sub_grade              50000 non-null object
emp_title              50000 non-null object
emp_length             45820 non-null object
home                   50000 non-null object
annual_inc             50000 non-null float64
verification_status    50000 non-null object
issue_d                50000 non-null object
loan_status            50000 non-null object
pymnt_plan             50000 non-null object
purpose                50000 non-null object
title                  50000 non-null object
zip_code               50000 non-null object
addr_state             50000 non-n

    확인결과 emp_title, emp_length, dti에 결측치가 존재합니다.
    해당 컬럼의 결측치 샘플들을 살펴보고 결측치를 처리해 보겠습니다.

In [153]:
# 컬럼별 결측치 확인을 위한 isnull()함수 리턴값이 bool 형태로 반환되어 조건부 샘플링이 가능합니다.
df['emp_title'].isnull().sum()

0

In [154]:
len(df) - df['emp_title'].isnull().sum()

50000

In [155]:
df[df['emp_title'].isnull()]

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,purpose,title,zip_code,addr_state,dti,delinq_2yrs


In [156]:
df[df['dti'].isnull()]

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,purpose,title,zip_code,addr_state,dti,delinq_2yrs
1020,30000,30000,30000.0,60 months,17.09,747.03,D,D1,,,MORTGAGE,0.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,347xx,FL,,1.0
1076,25200,25200,25200.0,60 months,12.62,568.49,C,C1,,,MORTGAGE,0.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,450xx,OH,,0.0
1193,22000,22000,22000.0,36 months,16.02,773.68,C,C5,,,MORTGAGE,0.0,Not Verified,Dec-2017,Late (31-120 days),y,credit_card,Credit card refinancing,935xx,CA,,0.0
1194,19000,19000,19000.0,60 months,16.02,462.25,C,C5,secretary,< 1 year,MORTGAGE,0.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,827xx,WY,,0.0
1902,20000,20000,20000.0,36 months,15.05,693.80,C,C4,,,RENT,0.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,871xx,NM,,4.0
2103,8000,8000,8000.0,36 months,7.97,250.59,A,A5,,,MORTGAGE,0.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,762xx,TX,,0.0
3009,24000,24000,24000.0,60 months,17.09,597.63,D,D1,,,MORTGAGE,0.0,Not Verified,Dec-2017,Fully Paid,n,home_improvement,Home improvement,365xx,AL,,0.0
3042,9950,9950,9925.0,36 months,5.32,299.65,A,A1,,,MORTGAGE,0.0,Not Verified,Dec-2017,Current,n,credit_card,Credit card refinancing,936xx,CA,,0.0
3712,32000,32000,32000.0,36 months,7.35,993.20,A,A4,,,RENT,0.0,Not Verified,Dec-2017,Current,n,moving,Moving and relocation,900xx,CA,,0.0
4222,9000,9000,9000.0,36 months,9.44,288.05,B,B1,,,OWN,0.0,Not Verified,Dec-2017,Current,n,debt_consolidation,Debt consolidation,775xx,TX,,2.0


    직업과 근속연수에 관한 부분은 데이터를 통한 유추나 계산값을 통해 채워넣을 수 있는 항목은 아닌 것 같습니다.
    다만 dti의 경우 실수로 채워져 있는 부분이니 수업을 위해 평균값 혹은 근사치를 계산하여 채워보도록 하겠습니다.

### 결측치 채우기

In [None]:
# dti 컬럼의 NaN값 index 확인
df[df['dti'].isnull()].index

In [None]:
# fillna() 함수로 NaN 값을 dti 컬럼의 평균으로 채우기
df['dti'].fillna(df['dti'].mean(), inplace=True) # inplace 파라메터 사용가능
# fillna() 함수의 다양한 채우기 방법 파라메터 확인해보기

### 결측치 제거

In [None]:
df[df['emp_title'].isnull()]

In [None]:
df.info()

In [None]:
df.dropna().info()

In [157]:
# 결측치 제거
df.dropna(inplace=True)

In [158]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 45813 entries, 0 to 49999
Data columns (total 22 columns):
loan_amnt              45813 non-null int64
funded_amnt            45813 non-null int64
funded_amnt_inv        45813 non-null float64
term                   45813 non-null object
int_rate               45813 non-null float64
installment            45813 non-null float64
grade                  45813 non-null object
sub_grade              45813 non-null object
emp_title              45813 non-null object
emp_length             45813 non-null object
home                   45813 non-null object
annual_inc             45813 non-null float64
verification_status    45813 non-null object
issue_d                45813 non-null object
loan_status            45813 non-null object
pymnt_plan             45813 non-null object
purpose                45813 non-null object
title                  45813 non-null object
zip_code               45813 non-null object
addr_state             45813 non-n