In [1]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime
from preprocessing import clsf_tty_cd_grp, clsf_boz_cd

In [2]:
# 환경설정
pd.options.display.float_format = '{:,.0f}'.format
os.makedirs('result', exist_ok=True)

# 전역변수
FILE_PATH = Path('./data/보유리스크율_일반')
BASE_YYMM = '201912'

In [3]:
# 데이터 불러오기
일반_원수_경과보험료 = pd.read_excel(FILE_PATH / f'일반_원수_경과보험료_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str})
일반_출재_경과보험료 = pd.read_excel(FILE_PATH / f'일반_출재_경과보험료_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str})
일반_원수_손해액 = pd.read_excel(FILE_PATH / f'일반_원수_손해액_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str})
일반_출재_손해액 = pd.read_excel(FILE_PATH / f'일반_출재_손해액_{BASE_YYMM}.xlsx', dtype={'CLG_YM': str, 'ARC_INPL_CD': str})
일반_특약보종별_직전1년경과보험료 = pd.read_excel(FILE_PATH / f'일반_특약보종별_직전1년경과보험료_{BASE_YYMM}.xlsx', dtype={'RRNR_TTY_CD': str, 'ARC_INPL_CD': str})
일반_특약보종별_직전1년손해액 = pd.read_excel(FILE_PATH / f'일반_특약보종별_직전1년손해액_{BASE_YYMM}.xlsx', dtype={'RRNR_TTY_CD': str, 'ARC_INPL_CD': str})
일반_특약수수료 = pd.read_excel(FILE_PATH / f'일반_특약수수료_{202012}.xlsx', dtype={'RRNR_TTY_CD': str, 'TTY_YR': str})
일반_특약정보 = pd.read_excel(FILE_PATH / f'일반_특약정보_{BASE_YYMM}.xlsx', dtype={'RRNR_TTY_CD': str, 'TTY_YR': str})
일반_상품정보 = pd.read_excel(FILE_PATH / '일반_상품정보.xlsx', dtype={'PDC_CD': str, 'PDGR_CD': str})

In [91]:
# 데이터 전처리
## 특약정보, 특약수수료
일반_특약정보_가공 = 일반_특약정보.copy()
일반_특약정보_가공['TTY_CD_GRP'] = clsf_tty_cd_grp(일반_특약정보_가공)
일반_특약정보_가공 = 일반_특약정보_가공.drop('TTY_CD_NM', axis=1)
일반_특약수수료_가공 = 일반_특약정보_가공 \
    .merge(일반_특약수수료[['RRNR_TTY_CD', 'CMSN_ADD_RT', 'CMSN_MULT_RT', 'BSE_LSRT', 'LWT_CMSN_RT', 'TOP_CMSN_RT']], on='RRNR_TTY_CD', how='left') \
    [['TTY_YR', 'TTY_CD_GRP', 'CMSN_ADD_RT', 'CMSN_MULT_RT', 'BSE_LSRT', 'LWT_CMSN_RT', 'TOP_CMSN_RT']] \
    .loc[lambda x: ~x.CMSN_ADD_RT.isna()].drop_duplicates() \
    .sort_values(by=['TTY_CD_GRP', 'TTY_YR'])

## 경과보험료
일반_특약보종별_직전1년경과보험료_가공 = 일반_특약보종별_직전1년경과보험료.copy()
일반_특약보종별_직전1년경과보험료_가공['DMFR_DVCD'] = '01'
일반_특약보종별_직전1년경과보험료_가공['BOZ_CD'] = clsf_boz_cd(일반_특약보종별_직전1년경과보험료_가공, 일반_상품정보)
일반_특약보종별_직전1년경과보험료_가공 = 일반_특약보종별_직전1년경과보험료_가공.merge(일반_특약정보_가공, on='RRNR_TTY_CD', how='left')
일반_특약보종별_직전1년경과보험료_가공.drop(['RRNR_TTY_CD', 'ARC_INPL_CD', 'DMFR_DVCD'], axis=1, inplace=True)

## 손해액
일반_특약보종별_직전1년손해액_가공 = 일반_특약보종별_직전1년손해액.copy()
일반_특약보종별_직전1년손해액_가공['DMFR_DVCD'] = '01'
일반_특약보종별_직전1년손해액_가공['BOZ_CD'] = clsf_boz_cd(일반_특약보종별_직전1년손해액_가공, 일반_상품정보)
일반_특약보종별_직전1년손해액_가공 = 일반_특약보종별_직전1년손해액_가공.merge(일반_특약정보_가공, on='RRNR_TTY_CD', how='left')
일반_특약보종별_직전1년손해액_가공.drop(['RRNR_TTY_CD', 'ARC_INPL_CD', 'DMFR_DVCD'], axis=1, inplace=True)

In [167]:
def get_ret_risk_rate_by_risk_coef(comm: pd.DataFrame, prem: pd.DataFrame, loss_ratio: float, risk_coef: float) -> pd.DataFrame:
    comm2 = comm \
        .assign(
            COMM_RATE_BASE = lambda x: np.fmax(np.fmin(x['CMSN_MULT_RT']*(x['BSE_LSRT']-loss_ratio)+x['CMSN_ADD_RT'], x['TOP_CMSN_RT']), x['LWT_CMSN_RT']),
            COMM_RATE_SHOCKED = lambda x: np.fmax(np.fmin(x['CMSN_MULT_RT']*(x['BSE_LSRT']-(1+risk_coef))+x['CMSN_ADD_RT'], x['TOP_CMSN_RT']), x['LWT_CMSN_RT']),
        ) \
        [['TTY_YR', 'COMM_RATE_BASE', 'COMM_RATE_SHOCKED']]
    prem2 = prem.merge(comm2, on='TTY_YR', how='left') \
        .assign(
            ORI_EXP_LOSS_BASE = lambda x: x['ELP_PRM']*loss_ratio,
            ORI_EXP_LOSS_SHOCKED = lambda x: x['ELP_PRM']*(1+risk_coef),
            ORI_EXP_LOSS_DIFF = lambda x: x['ORI_EXP_LOSS_SHOCKED'] - x['ORI_EXP_LOSS_BASE'],
            RET_EXP_LOSS_BASE = lambda x: (x['ELP_PRM']-x['T02_RN_ELP_PRM'])*loss_ratio,
            RET_EXP_LOSS_SHOCKED = lambda x: (x['ELP_PRM']-x['T02_RN_ELP_PRM'])*(1+risk_coef),
            RET_EXP_LOSS_DIFF = lambda x: x['RET_EXP_LOSS_SHOCKED'] - x['RET_EXP_LOSS_BASE'],
            COMM_BASE = lambda x: x['T02_RN_ELP_PRM']*x['COMM_RATE_BASE'],
            COMM_SHOCKED = lambda x: x['T02_RN_ELP_PRM']*x['COMM_RATE_SHOCKED'],
            COMM_DIFF = lambda x: x['COMM_SHOCKED'] - x['COMM_BASE'],
        ) \
        .drop(['COMM_RATE_BASE', 'COMM_RATE_SHOCKED'], axis=1) \
        .astype({'T02_RN_ELP_PRM': float}) \
        .sort_values(by=['BOZ_CD', 'TTY_YR'])
    return prem2


# 예시
# 근재보험특약, 기술보험특약, 재물보험특약, 배상책임보험특약, 해외PST
특약명 = "기술보험특약"
prem = 일반_특약보종별_직전1년경과보험료_가공.query('TTY_CD_GRP == @특약명').groupby(['TTY_YR', 'BOZ_CD'], as_index=False)[['ELP_PRM', 'T02_RN_ELP_PRM']].sum()
comm = 일반_특약수수료_가공.query('TTY_CD_GRP == @특약명').drop('TTY_CD_GRP', axis=1)
loss_ratio = 0.6
risk_coef = 0.55
get_ret_risk_rate_by_risk_coef(comm, prem, loss_ratio, risk_coef)

Unnamed: 0,TTY_YR,BOZ_CD,ELP_PRM,T02_RN_ELP_PRM,ORI_EXP_LOSS_BASE,ORI_EXP_LOSS_SHOCKED,ORI_EXP_LOSS_DIFF,RET_EXP_LOSS_BASE,RET_EXP_LOSS_SHOCKED,RET_EXP_LOSS_DIFF,COMM_BASE,COMM_SHOCKED,COMM_DIFF
0,2013,A002,7545087,5607018,4527052,11694885,7167833,1162841,3004007,1841166,1996098,1794246,-201853
1,2014,A002,217202262,123040504,130321357,336663506,206342149,56497055,145950725,89453670,43802419,39372961,-4429458
2,2015,A002,37594020,15393040,22556412,58270731,35714319,13320588,34411519,21090931,5479922,4925773,-554149
3,2016,A002,207109805,50539226,124265883,321020198,196754315,93942347,242684397,148742050,17890886,16172552,-1718334
5,2017,A002,718704908,131154554,431222945,1113992607,682769663,352530212,910703049,558172836,46428712,41969457,-4459255
7,2018,A002,2149270575,541190673,1289562345,3331369391,2041807046,964847941,2492523848,1527675907,188875545,173181015,-15694530
9,2019,A002,2520270388,817291327,1512162233,3906419101,2394256869,1021787437,2639617545,1617830108,281148216,261533225,-19614992
4,2016,A003,2066853,1446796,1240112,3203622,1963510,372034,961088,589054,512166,462975,-49191
6,2017,A003,15817764,11072435,9490658,24517534,15026876,2847197,7355260,4508063,3919642,3543179,-376463
8,2018,A003,1608908182,1113899241,965344909,2493807682,1528462773,297005365,767263859,470258494,388750835,356447757,-32303078


In [None]:
def get_ret_risk_rate_by_loss_dist(comm: pd.DataFrame, prem: pd.DataFrame, loss: pd.DataFrame, loss_ratio: float, cv: pd.DataFrame) -> pd.DataFrame:
    """특약별 보유리스크율(손해율분포법)"""