In [1]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime
from preprocessing import *

In [2]:
# 환경설정
pd.options.display.float_format = '{:,.0f}'.format
os.makedirs('result', exist_ok=True)

# 전역변수
FILE_PATH = Path('data/보유리스크율_일반')
BASE_YYMM = '201912'

In [8]:
# 데이터 불러오기
일반_원수_경과보험료 = pd.read_excel(FILE_PATH / f'일반_원수_경과보험료_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str, 'RRNR_DAT_DVCD': str, 'RRNR_CTC_BZ_DVCD': str})
일반_출재_경과보험료 = pd.read_excel(FILE_PATH / f'일반_출재_경과보험료_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str, 'RRNR_DAT_DVCD': str, 'RRNR_CTC_BZ_DVCD': str})
일반_원수_손해액 = pd.read_excel(FILE_PATH / f'일반_원수_손해액_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str, 'RRNR_DAT_DVCD': str, 'RRNR_DMFR_DVCD': str})
일반_출재_손해액 = pd.read_excel(FILE_PATH / f'일반_출재_손해액_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str, 'RRNR_DAT_DVCD': str, 'RRNR_DMFR_DVCD': str})
일반_특약보종별_직전1년경과보험료 = pd.read_excel(FILE_PATH / f'일반_특약보종별_직전1년경과보험료_{BASE_YYMM}.xlsx', dtype={'RRNR_TTY_CD': str, 'PDC_CD': str})
일반_특약보종별_직전1년손해액 = pd.read_excel(FILE_PATH / f'일반_특약보종별_직전1년손해액_{BASE_YYMM}.xlsx', dtype={'RRNR_TTY_CD': str, 'PDC_CD': str})
일반_특약수수료 = pd.read_excel(FILE_PATH / f'일반_특약수수료_{202012}.xlsx', dtype={'RRNR_TTY_CD': str, 'TTY_YR': str})
일반_특약정보 = pd.read_excel(FILE_PATH / f'일반_특약정보_{BASE_YYMM}.xlsx', dtype={'RRNR_TTY_CD': str, 'TTY_YR': str})
일반_상품정보 = pd.read_excel(FILE_PATH / '일반_상품정보.xlsx', dtype={'PDC_CD': str, 'PDGR_CD': str})
산업_손해율_변동계수 = pd.read_excel(FILE_PATH / '산업_손해율_변동계수.xlsx')

In [9]:
# 일반 손해율
일반_원수_경과보험료_가공 = 일반_원수_경과보험료.copy()
일반_원수_경과보험료_가공['RRNR_DVCD'] = clsf_rrnr_dvcd(일반_원수_경과보험료_가공, '원수')
일반_원수_경과보험료_가공['DMFR_DVCD'] = clsf_dmfr_dvcd(일반_원수_경과보험료_가공)
일반_원수_경과보험료_가공['BOZ_CD'] = clsf_boz_cd(일반_원수_경과보험료_가공, 일반_상품정보)

일반_출재_경과보험료_가공 = 일반_출재_경과보험료.copy()
일반_출재_경과보험료_가공['RRNR_DVCD'] = clsf_rrnr_dvcd(일반_출재_경과보험료_가공, '출재')
일반_출재_경과보험료_가공['DMFR_DVCD'] = clsf_dmfr_dvcd(일반_출재_경과보험료_가공)
일반_출재_경과보험료_가공['BOZ_CD'] = clsf_boz_cd(일반_출재_경과보험료_가공, 일반_상품정보)

일반_원수_경과보험료_집계 = 일반_원수_경과보험료_가공.groupby(['FY', 'BOZ_CD'])['OGL_ELP_PRM'].sum().reset_index()
일반_출재_경과보험료_집계 = 일반_출재_경과보험료_가공.groupby(['FY', 'BOZ_CD'])['RN_ELP_PRM'].sum().reset_index()
일반_경과보험료_집계 = 일반_원수_경과보험료_집계.merge(일반_출재_경과보험료_집계, on=['FY', 'BOZ_CD'], how='outer')
일반_경과보험료_집계[['OGL_ELP_PRM', 'RN_ELP_PRM']] = 일반_경과보험료_집계[['OGL_ELP_PRM', 'RN_ELP_PRM']].fillna(0)
일반_경과보험료_집계.eval('ELP_PRM = OGL_ELP_PRM-RN_ELP_PRM', inplace=True)
일반_경과보험료_집계.drop(['OGL_ELP_PRM', 'RN_ELP_PRM'], axis=1, inplace=True)

일반_원수_손해액_가공 = 일반_원수_손해액.copy()
일반_원수_손해액_가공['RRNR_DVCD'] = clsf_rrnr_dvcd(일반_원수_손해액_가공, '원수')
일반_원수_손해액_가공['DMFR_DVCD'] = clsf_dmfr_dvcd(일반_원수_손해액_가공)
일반_원수_손해액_가공['BOZ_CD'] = clsf_boz_cd(일반_원수_손해액_가공, 일반_상품정보)

일반_출재_손해액_가공 = 일반_출재_손해액.copy()
일반_출재_손해액_가공['RRNR_DVCD'] = clsf_rrnr_dvcd(일반_출재_손해액_가공, '출재')
일반_출재_손해액_가공['DMFR_DVCD'] = clsf_dmfr_dvcd(일반_출재_손해액_가공)
일반_출재_손해액_가공['BOZ_CD'] = clsf_boz_cd(일반_출재_손해액_가공, 일반_상품정보)

일반_원수_손해액_집계 = 일반_원수_손해액_가공.groupby(['FY', 'BOZ_CD'])['OGL_LOSS'].sum().reset_index()
일반_출재_손해액_집계 = 일반_출재_손해액_가공.groupby(['FY', 'BOZ_CD'])['RN_LOSS'].sum().reset_index()
일반_손해액_집계 = 일반_원수_손해액_집계.merge(일반_출재_손해액_집계, on=['FY', 'BOZ_CD'], how='outer')
일반_손해액_집계[['OGL_LOSS', 'RN_LOSS']] = 일반_손해액_집계[['OGL_LOSS', 'RN_LOSS']].fillna(0)
일반_손해액_집계.eval('LOSS = OGL_LOSS-RN_LOSS', inplace=True)
일반_손해액_집계.drop(['OGL_LOSS', 'RN_LOSS'], axis=1, inplace=True)

일반_손해율 = 일반_경과보험료_집계.merge(일반_손해액_집계, on=['FY', 'BOZ_CD'], how='outer')
일반_손해율[['ELP_PRM', 'LOSS']] = 일반_손해율[['ELP_PRM', 'LOSS']].fillna(0)
일반_손해율.eval('LOSS_RATIO = LOSS/ELP_PRM*100', inplace=True)
일반_손해율.drop(['LOSS', 'ELP_PRM'], axis=1, inplace=True)
일반_손해율 = 일반_손해율.groupby(['BOZ_CD'], as_index=False)['LOSS_RATIO'].mean()
일반_손해율

Unnamed: 0,BOZ_CD,LOSS_RATIO
0,A001,70
1,A002,51
2,A003,33
3,A004,43
4,A005,54
5,A006,40
6,A007,46
7,A008,2
8,A009,88
9,A010,29


In [None]:
# 데이터 불러오기
일반_원수_경과보험료 = pd.read_excel(FILE_PATH / f'일반_원수_경과보험료_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str})
일반_출재_경과보험료 = pd.read_excel(FILE_PATH / f'일반_출재_경과보험료_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str})
일반_원수_손해액 = pd.read_excel(FILE_PATH / f'일반_원수_손해액_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str})
일반_출재_손해액 = pd.read_excel(FILE_PATH / f'일반_출재_손해액_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str})
일반_특약보종별_직전1년경과보험료 = pd.read_excel(FILE_PATH / f'일반_특약보종별_직전1년경과보험료_{BASE_YYMM}.xlsx', dtype={'RRNR_TTY_CD': str, 'ARC_INPL_CD': str})
일반_특약보종별_직전1년손해액 = pd.read_excel(FILE_PATH / f'일반_특약보종별_직전1년손해액_{BASE_YYMM}.xlsx', dtype={'RRNR_TTY_CD': str, 'ARC_INPL_CD': str})
일반_특약수수료 = pd.read_excel(FILE_PATH / f'일반_특약수수료_{202012}.xlsx', dtype={'RRNR_TTY_CD': str, 'TTY_YR': str})
일반_특약정보 = pd.read_excel(FILE_PATH / f'일반_특약정보_{BASE_YYMM}.xlsx', dtype={'RRNR_TTY_CD': str, 'TTY_YR': str})
일반_상품정보 = pd.read_excel(FILE_PATH / '일반_상품정보.xlsx', dtype={'PDC_CD': str, 'PDGR_CD': str})
산업_손해율_변동계수 = pd.read_excel(FILE_PATH / '산업_손해율_변동계수.xlsx')

In [None]:
# 일반_특약보종별_직전1년경과보험료_가공
일반_특약보종별_직전1년경과보험료_가공.merge(일반_특약보종별_직전1년손해액_가공, on=['BOZ_CD', 'TTY_YR', 'TTY_CD_GRP'], how='outer')

In [None]:
# 예시
# 근재보험특약, 기술보험특약, 재물보험특약, 배상책임보험특약, 해외PST
특약명 = "해외PST"
prem = 일반_특약보종별_직전1년경과보험료_가공.query('TTY_CD_GRP == @특약명').groupby(['TTY_YR', 'BOZ_CD'], as_index=False)[['ELP_PRM', 'T02_RN_ELP_PRM']].sum()
comm = 일반_특약수수료_가공.query('TTY_CD_GRP == @특약명').drop('TTY_CD_GRP', axis=1)
loss_ratio = 0.6
risk_coef = 0.55
get_ret_risk_rate_by_risk_coef(comm, prem, loss_ratio, risk_coef)

In [None]:
def get_ret_risk_rate_by_loss_dist(comm: pd.DataFrame, prem: pd.DataFrame, loss: pd.DataFrame, loss_ratio: float, cv: pd.DataFrame) -> pd.DataFrame:
    """특약별 보유리스크율(손해율분포법)"""