In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import poisson, expon, lognorm, norm, beta
from datetime import datetime
from dateutil.relativedelta import relativedelta
import warnings

In [2]:
# 환경설정
warnings.filterwarnings("ignore")
pd.options.display.float_format = '{:,.0f}'.format

#### 1. 사고

In [10]:
# 사고건수 생성
accidents_per_month = poisson.rvs(mu=100)

In [11]:
# 사고일자 생성(2017년 12월)
base_year = 2017
base_month = 1
start_date = datetime(base_year, base_month, 1)
end_date = start_date + relativedelta(months=1, days=-1)
days = (end_date - start_date).days+1
accidents_days = np.random.choice(a=days, size=accidents_per_month)+1

In [12]:
# 사고일자 테이블 생성
accidents = []
unique, counts = np.unique(accidents_days, return_counts=True)
for i in range(len(unique)):
    for j in range(counts[i]):
        accidents.append(["ACCD{0:04d}{1:02d}{2:02d}{3:04d}".format(base_year, base_month, unique[i], j+1),
                          datetime(base_year, base_month, unique[i])])
accidents = pd.DataFrame(accidents, columns=['ACCD_NUM', 'ACCD_DATE'])

In [13]:
# 사고지연일자 컬럼 생성
accidents['RPT_DATE'] = None
accidents['RPT_DATE'] = accidents['RPT_DATE'].astype(np.datetime64)
report_delays = expon.rvs(scale=10, size=accidents_per_month).round()
for i in range(accidents_per_month):
    accidents['RPT_DATE'][i] = accidents['ACCD_DATE'][i] + relativedelta(days=int(report_delays[i]))

In [14]:
# 사고종결일자 컬럼 생성
accidents['END_DATE'] = None
accidents['END_DATE'] = accidents['END_DATE'].astype(np.datetime64)
report_delays = expon.rvs(scale=20, size=accidents_per_month).round()
for i in range(accidents_per_month):
    accidents['END_DATE'][i] = accidents['RPT_DATE'][i] + relativedelta(days=int(report_delays[i]))

In [15]:
# 사고심도 컬럼 생성
accidents['TOT_LOSS'] = lognorm.rvs(s=1, loc=2, scale=50, size=accidents_per_month)*1e6

### 2. 계약

In [181]:
# 계약건수
contract_start = 1000
new_contract_per_month = int(norm.rvs(loc=200, scale=10).round())
lapse_rate = beta.rvs(a=1, b=10)
contract_lapse = int((contract_start*lapse_rate).round())
contract_end = contract_start - contract_lapse + new_contract_per_month
contract_start, new_contract_per_month, contract_lapse, contract_end

(1000, 197, 20, 1177)