In [122]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Tuple
from preprocessing import clsf_boz_cd, clsf_rrnr_dvcd, clsf_dmfr_dvcd, clsf_crd_grd

In [19]:
# 환경설정
pd.options.display.float_format = '{:,.0f}'.format
os.makedirs('result', exist_ok=True)

# 전역변수
FILE_PATH = Path('./data/현행추정부채_일반')

In [20]:
# 데이터 불러오기
일반_상품정보 = pd.read_excel(FILE_PATH / '일반_상품정보.xlsx', dtype={'PDC_CD': str, 'PDGR_CD': str})
일반_원수_미경과보험료 = pd.read_excel(FILE_PATH / '일반_원수_미경과보험료.xlsx', dtype={'RRNR_DAT_DVCD': str, 'RRNR_CTC_BZ_DVCD': str, 'ARC_INPL_CD': str})
일반_출재_미경과보험료 = pd.read_excel(FILE_PATH / '일반_출재_미경과보험료.xlsx', dtype={'RRNR_DAT_DVCD': str, 'RRNR_CTC_BZ_DVCD': str, 'ARC_INPL_CD': str, 'T02_RN_RINSC_CD': str})
재보험자_국내신용등급 = pd.read_excel(FILE_PATH / '재보험자_국내신용등급.xlsx', dtype={'재보험사코드': str}) \
    .rename(columns = {'재보험사코드': 'T02_RN_RINSC_CD', '국내신용등급': 'CRD_GRD'})



In [117]:
일반_보험금진전추이 = pd.read_excel(FILE_PATH / '일반_할인요소_검증_201912_20210218.xlsx', sheet_name='보험금_진전추이', dtype={'PDGR_CD': str, 'AY_YM': str})
일반_보험금진전추이 = 일반_보험금진전추이 \
    .assign(
        BASE_6 = lambda x: np.where(x['BASE_6'].isna(), x['BASE_5'], x['BASE_6']),
        BASE_7 = lambda x: np.where(x['BASE_7'].isna(), x['BASE_6'], x['BASE_7']),
    ) \
    .filter(['PDGR_CD', 'AY_YM', 'BASE_1', 'BASE_2', 'BASE_3', 'BASE_4', 'BASE_5', 'BASE_6', 'BASE_7']) \
    .astype({'BASE_1': float, 'BASE_2': float, 'BASE_3': float, 'BASE_4': float, 'BASE_5': float})

# 함수
from typing import List, Tuple
# get_cf(cf: pd.DataFrame, int_rate: pd.DataFrame, pdgr_cd: str, cf_type: str) -> Tuple[pd.Series, pd.Series]:
cf = 일반_보험금진전추이.copy().query('PDGR_CD == "26"')
pdgr_cd = '26'
cf_type = '보험금'

n = 7 if pdgr_cd in ['25', '26'] else 5
if not set(['AY_YM', 'BASE_1', 'BASE_2', 'BASE_3', 'BASE_4', 'BASE_5', 'BASE_6', 'BASE_7']).issubset(cf.columns):
    raise Exception('cf 필수 컬럼 누락 오류')

if len(cf) != n:
    raise Exception('cf 입력 크기 오류')

cf_arr = cf.sort_values(by='AY_YM')[['BASE_1', 'BASE_2', 'BASE_3', 'BASE_4', 'BASE_5', 'BASE_6', 'BASE_7']].to_numpy()
# 지급CF 계산
pay_cf_all = []
for i in range(n-1, 0, -1):
    pay_cf = 0
    for j in range(i):
        pay_cf += cf_arr[(n-1)-j, (n-1-i)+j+1]-cf_arr[(n-1)-j, (n-1-i)+j]
    pay_cf = max(pay_cf, 0)
    pay_cf_all.append(pay_cf)
pay_cf_all = np.array(pay_cf_all)
cf_t = np.arange(0.5, n-0.5)
pay_cf_all = pay_cf_all/pay_cf_all.sum()
pay_cf_all

array([0.53215922, 0.21333367, 0.12768267, 0.07229671, 0.03756675,
       0.01696098])

In [133]:
def get_cf(cf: pd.DataFrame, pdgr_cd: str, cf_type: str) -> Tuple[pd.Series, pd.Series]:
    n = 7 if pdgr_cd in ['25', '26'] else 5
    if not set(['AY_YM', 'BASE_1', 'BASE_2', 'BASE_3', 'BASE_4', 'BASE_5', 'BASE_6', 'BASE_7']).issubset(cf.columns):
        raise Exception('cf 필수 컬럼 누락 오류')

    if len(cf) != n:
        raise Exception('cf 입력 크기 오류')

    cf_arr = cf.sort_values(by='AY_YM')[['BASE_1', 'BASE_2', 'BASE_3', 'BASE_4', 'BASE_5', 'BASE_6', 'BASE_7']].to_numpy()

    pay_cf_all = []
    for i in range(n-1, 0, -1):
        pay_cf = 0
        for j in range(i):
            pay_cf += cf_arr[(n-1)-j, (n-1-i)+j+1]-cf_arr[(n-1)-j, (n-1-i)+j]
        pay_cf = max(pay_cf, 0)
        pay_cf_all.append(pay_cf)
    pay_cf_all = np.array(pay_cf_all)
    pay_cf_rate = pd.Series(pay_cf_all/pay_cf_all.sum())
    
    if cf_type == '보험금':
        cf_t = pd.Series(np.arange(0.5, n-0.5))
        return (cf_t, pay_cf_rate)

# 함수
cf = 일반_보험금진전추이.query('PDGR_CD == "26"')
pdgr_cd = '24'
cf_type = '보험금'
cf_t, cf = get_cf(일반_보험금진전추이.query('PDGR_CD == @pdgr_cd'), pdgr_cd, '보험금')
cf_t

0   0
1   2
2   2
3   4
dtype: float64

In [21]:
# 데이터 전처리
일반_원수_미경과보험료['BOZ_CD'] = clsf_boz_cd(일반_원수_미경과보험료, 일반_상품정보)
일반_원수_미경과보험료['RRNR_DVCD'] = clsf_rrnr_dvcd(일반_원수_미경과보험료, '원수')
일반_원수_미경과보험료['DMFR_DVCD'] = clsf_dmfr_dvcd(일반_원수_미경과보험료)

일반_출재_미경과보험료['BOZ_CD'] = clsf_boz_cd(일반_출재_미경과보험료, 일반_상품정보)
일반_출재_미경과보험료['RRNR_DVCD'] = clsf_rrnr_dvcd(일반_출재_미경과보험료, '출재')
일반_출재_미경과보험료['DMFR_DVCD'] = clsf_dmfr_dvcd(일반_출재_미경과보험료)
일반_출재_미경과보험료['CRD_GRD'] = clsf_crd_grd(일반_출재_미경과보험료, 재보험자_국내신용등급)
일반_출재_미경과보험료

Unnamed: 0,RRNR_DAT_DVCD,RRNR_CTC_BZ_DVCD,NTNL_CTRY_CD,ARC_INPL_CD,T02_RN_RINSC_CD,T02_LTPD_RN_URND_PRM,BOZ_CD,RRNR_DVCD,DMFR_DVCD,CRD_GRD
0,01,3,KR,1040110,124012,244906286,A001,03,01,AA
1,01,3,KR,1054410,124012,277457,A007,03,01,AA
2,01,3,KR,1049210,124060,3997104940,A006,03,01,AAA
3,01,3,KR,1067110,124012,13935049,A007,03,01,AA
4,01,3,KR,1061610,122086,35489513,A007,03,01,AA-
...,...,...,...,...,...,...,...,...,...,...
1713,01,3,KR,1075010,511229,159905282,A004,03,01,AA+
1714,01,3,KR,1050310,511098,8226012,A003,03,01,AA
1715,01,3,KR,1050310,511127,5484008,A003,03,01,AA
1716,01,3,KR,1050310,511279,2742004,A003,03,01,AA


In [22]:
# 데이터 집계
일반_원수_미경과보험료_집계 = 일반_원수_미경과보험료.groupby(['RRNR_DVCD', 'DMFR_DVCD', 'BOZ_CD'])['LTPD_URND_PRM'].sum().reset_index()
일반_원수_미경과보험료_집계.head()

Unnamed: 0,RRNR_DVCD,DMFR_DVCD,BOZ_CD,LTPD_URND_PRM
0,1,1,A001,23420916171
1,1,1,A002,27146263273
2,1,1,A003,96625029260
3,1,1,A004,22191701960
4,1,1,A005,5095840261


In [None]:
# 데이터 내보내기