In [1]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Tuple
from preprocessing import clsf_boz_cd, clsf_rrnr_dvcd, clsf_dmfr_dvcd, clsf_crd_grd, get_cf

In [2]:
# 환경설정
pd.options.display.float_format = '{:,.0f}'.format
os.makedirs('result', exist_ok=True)

# 전역변수
FILE_PATH = Path('./data/현행추정부채_일반')

In [3]:
# 데이터 불러오기
일반_상품정보 = pd.read_excel(FILE_PATH / '일반_상품정보.xlsx', dtype={'PDC_CD': str, 'PDGR_CD': str})
일반_원수_미경과보험료 = pd.read_excel(FILE_PATH / '일반_원수_미경과보험료.xlsx', dtype={'RRNR_DAT_DVCD': str, 'RRNR_CTC_BZ_DVCD': str, 'ARC_INPL_CD': str})
일반_출재_미경과보험료 = pd.read_excel(FILE_PATH / '일반_출재_미경과보험료.xlsx', dtype={'RRNR_DAT_DVCD': str, 'RRNR_CTC_BZ_DVCD': str, 'ARC_INPL_CD': str, 'T02_RN_RINSC_CD': str})
재보험자_국내신용등급 = pd.read_excel(FILE_PATH / '재보험자_국내신용등급.xlsx', dtype={'재보험사코드': str}) \
    .rename(columns = {'재보험사코드': 'T02_RN_RINSC_CD', '국내신용등급': 'CRD_GRD'})
일반_보험금진전추이 = pd.read_excel(FILE_PATH / '일반_보험금진전추이.xlsx')
할인율 = pd.read_excel(FILE_PATH / '할인율.xlsx')

In [4]:
# 함수
cf = 일반_보험금진전추이.query('PDGR_CD == "26"')
pdgr_cd = '26'
cf_type = '보험료'
cf_t, cf = get_cf(일반_보험금진전추이.query('PDGR_CD == @pdgr_cd'), pdgr_cd, cf_type)
cf_t, cf*100

(0   0
 1   1
 2   2
 3   3
 4   4
 5   5
 dtype: float64,
 0   61
 1   20
 2   11
 3    6
 4    2
 5    1
 dtype: float64)

Unnamed: 0,BSE_YM,PDGR_CD,AY,BASE_1,BASE_2,BASE_3,BASE_4,BASE_5,BASE_6,BASE_7,...,누적비율,Spot Rate,할인요소_준비금부채,Unnamed: 20,Year.1,평균지급기간.1,조정비율,1-조정비율,Spot Rate.1,할인요소_보험료부채
0,201812,23,201412,9639925332,14877875923,14086542751,14017337616,13876794563,,,...,1.0,0,1,,1.0,0.0,1.0,1.0,0,1
1,201812,23,201512,14132224400,18746470441,18738660139,18653335765,18466310466,,,...,1.0,0,0,,2.0,1.0,1.0,0.0,0,0
2,201812,23,201612,14933019683,19095676600,19265830643,19175133889,18982876848,,,...,1.0,0,0,,3.0,2.0,1.0,0.0,0,0
3,201812,23,201712,7989873013,11083330122,10951097678,10899543762,10790260872,,,...,1.0,0,0,,4.0,3.0,1.0,0.0,0,0
4,201812,23,201812,14251631432,19473199402,19240869522,19150290277,18958282327,,,...,,일반/화재,1,,,,,,일반/화재,1
5,201812,24,201412,2190993214,6546162379,8047012762,8664280736,9424580163,,,...,1.0,0,1,,1.0,0.0,1.0,1.0,0,1
6,201812,24,201512,1846469310,6857098077,8666964580,10348464961,11256553263,,,...,1.0,0,0,,2.0,1.0,1.0,0.0,0,0
7,201812,24,201612,1536339115,2808637207,4916131681,5592275229,6083003055,,,...,1.0,0,0,,3.0,2.0,1.0,0.0,0,0
8,201812,24,201712,2127794805,8385689025,11188287245,12727076017,13843889853,,,...,1.0,0,0,,4.0,3.0,1.0,0.0,0,0
9,201812,24,201812,2086529202,6664018726,8891214007,10114073233,11001593422,,,...,,일반/기술,1,,,,,,일반/기술,1


In [21]:
# 데이터 전처리
일반_원수_미경과보험료['BOZ_CD'] = clsf_boz_cd(일반_원수_미경과보험료, 일반_상품정보)
일반_원수_미경과보험료['RRNR_DVCD'] = clsf_rrnr_dvcd(일반_원수_미경과보험료, '원수')
일반_원수_미경과보험료['DMFR_DVCD'] = clsf_dmfr_dvcd(일반_원수_미경과보험료)

일반_출재_미경과보험료['BOZ_CD'] = clsf_boz_cd(일반_출재_미경과보험료, 일반_상품정보)
일반_출재_미경과보험료['RRNR_DVCD'] = clsf_rrnr_dvcd(일반_출재_미경과보험료, '출재')
일반_출재_미경과보험료['DMFR_DVCD'] = clsf_dmfr_dvcd(일반_출재_미경과보험료)
일반_출재_미경과보험료['CRD_GRD'] = clsf_crd_grd(일반_출재_미경과보험료, 재보험자_국내신용등급)
일반_출재_미경과보험료

Unnamed: 0,RRNR_DAT_DVCD,RRNR_CTC_BZ_DVCD,NTNL_CTRY_CD,ARC_INPL_CD,T02_RN_RINSC_CD,T02_LTPD_RN_URND_PRM,BOZ_CD,RRNR_DVCD,DMFR_DVCD,CRD_GRD
0,01,3,KR,1040110,124012,244906286,A001,03,01,AA
1,01,3,KR,1054410,124012,277457,A007,03,01,AA
2,01,3,KR,1049210,124060,3997104940,A006,03,01,AAA
3,01,3,KR,1067110,124012,13935049,A007,03,01,AA
4,01,3,KR,1061610,122086,35489513,A007,03,01,AA-
...,...,...,...,...,...,...,...,...,...,...
1713,01,3,KR,1075010,511229,159905282,A004,03,01,AA+
1714,01,3,KR,1050310,511098,8226012,A003,03,01,AA
1715,01,3,KR,1050310,511127,5484008,A003,03,01,AA
1716,01,3,KR,1050310,511279,2742004,A003,03,01,AA


In [22]:
# 데이터 집계
일반_원수_미경과보험료_집계 = 일반_원수_미경과보험료.groupby(['RRNR_DVCD', 'DMFR_DVCD', 'BOZ_CD'])['LTPD_URND_PRM'].sum().reset_index()
일반_원수_미경과보험료_집계.head()

Unnamed: 0,RRNR_DVCD,DMFR_DVCD,BOZ_CD,LTPD_URND_PRM
0,1,1,A001,23420916171
1,1,1,A002,27146263273
2,1,1,A003,96625029260
3,1,1,A004,22191701960
4,1,1,A005,5095840261


In [None]:
# 데이터 내보내기