In [1]:
import pandas as pd
import numpy as np

import os

os.chdir("../")
root_path = os.getcwd()

data_folder_path = os.path.join(root_path, 'data')
original_file_path = os.path.join(data_folder_path, 'original_data')
original_raw_file_path = os.path.join(original_file_path,'raw_data')
original_processed_file_path = os.path.join(original_file_path,'processed_data')

raw_file_folders = os.listdir(original_raw_file_path)

external_file_path = os.path.join(data_folder_path,'external_data')
external_raw_file_path = os.path.join(external_file_path,'raw_data')
external_processed_file_path = os.path.join(external_file_path,'processed_data')

In [2]:
# 필요데이터
cj_deliv = pd.read_csv(os.path.join(original_processed_file_path, 'CJ_delivery.csv'))

cj_deliv['DL_YMD'] = pd.to_datetime(cj_deliv.DL_YMD.apply(lambda x : '20'+str(x)), format='%Y%m%d')

cj_deliv['year'] = cj_deliv.DL_YMD.dt.year.values
cj_deliv['month'] = cj_deliv.DL_YMD.dt.month.values
cj_deliv['weekofyear'] = cj_deliv.DL_YMD.dt.weekofyear.values
cj_deliv['dayofweek'] = cj_deliv.DL_YMD.dt.dayofweek.values

cj_deliv['weekday'] = np.where(cj_deliv.dayofweek < 5, '평일', '주말')

# 특일정보
holiday = pd.to_datetime(['2019-02-04','2019-02-05','2019-02-06','2019-03-01','2019-05-06','2019-05-12','2020-04-15', '2020-04-30','2020-05-05'])

  


**Impact Index**  
$r_w,_d$ : w주차, d번째 요일의 19년대비 20년도 송장건수 비율 (d번째 요일 - d = 0:월, 1:화, ..., 6:일)  
$post_r=Max(\{r_w,_d | w \in [8,11], d \in [0,6]\})$    
$pre_r=Mean(\{r_w,_d | w=7, d \in [0,6]\})$  
  
$Impact Index = {post_r-pre_r \over pre_r}$

In [3]:
def Impact_Index(df):
    #코로나 이후(2.19 ~ 3.15사이) 전년대비 송장건수 비율의 최댓값
    post_r = df.loc[df.weekday=='평일'].\
            groupby(['DL_GD_LCLS_NM','year','weekofyear','dayofweek'])['INVC_CONT'].sum().\
            reset_index().\
            groupby(['DL_GD_LCLS_NM','weekofyear','dayofweek']).\
            apply(lambda x : (x.iloc[1,-1]/x.iloc[0,-1]) if x.shape[0]==2 else 1).\
            reset_index(name='rate').\
            query('weekofyear>=8 & weekofyear<=11 & rate!=1').\
            groupby('DL_GD_LCLS_NM')['rate'].max()

    #코로나 이전 전년대비 송장건수 비율 평균
    pre_r = df.loc[df.weekday=='평일'].\
            groupby(['DL_GD_LCLS_NM','year','weekofyear','dayofweek'])['INVC_CONT'].sum().\
            reset_index().\
            groupby(['DL_GD_LCLS_NM','weekofyear','dayofweek']).\
            apply(lambda x : (x.iloc[1,-1]/x.iloc[0,-1]) if x.shape[0]==2 else 1).\
            reset_index(name='rate').\
            query('weekofyear==7 & rate!=1').\
            groupby('DL_GD_LCLS_NM')['rate'].mean()
            
    index = (post_r-pre_r)/pre_r
    
    return dict(index)

In [4]:
Impact_Index(cj_deliv)

{'가구/인테리어': 38.62273587932063,
 '도서/음반': 4.29741147125179,
 '디지털/가전': 30.124177754198858,
 '생활건강': 32.755372747305856,
 '스포츠/레저': 134.88186398536988,
 '식품': 30.621609381087673,
 '출산/육아': 123.65266112801581,
 '패션의류': 36.03150405986596,
 '패션잡화': 66.4414443152583,
 '화장품/미용': 32.598832761361976}