In [1]:
import pandas as pd
import numpy as np

import os

os.chdir("../")
root_path = os.getcwd()

data_folder_path = os.path.join(root_path, 'data')
original_file_path = os.path.join(data_folder_path, 'original_data')
original_raw_file_path = os.path.join(original_file_path,'raw_data')
original_processed_file_path = os.path.join(original_file_path,'processed_data')

raw_file_folders = os.listdir(original_raw_file_path)

external_file_path = os.path.join(data_folder_path,'external_data')
external_raw_file_path = os.path.join(external_file_path,'raw_data')
external_processed_file_path = os.path.join(external_file_path,'processed_data')

In [2]:
# 필요데이터
cj_deliv = pd.read_csv(os.path.join(original_processed_file_path, 'CJ_delivery.csv'))

cj_deliv['DL_YMD'] = pd.to_datetime(cj_deliv.DL_YMD.apply(lambda x : '20'+str(x)), format='%Y%m%d')

cj_deliv['year'] = cj_deliv.DL_YMD.dt.year.values
cj_deliv['month'] = cj_deliv.DL_YMD.dt.month.values
cj_deliv['weekofyear'] = cj_deliv.DL_YMD.dt.weekofyear.values
cj_deliv['dayofweek'] = cj_deliv.DL_YMD.dt.dayofweek.values

cj_deliv['weekday'] = np.where(cj_deliv.dayofweek < 5, '평일', '주말')

# 특일정보
holiday = pd.to_datetime(['2019-02-04','2019-02-05','2019-02-06','2019-03-01','2019-05-06','2019-05-12','2020-04-15', '2020-04-30','2020-05-05'])

  


**Growth Index**  
$r_w,_d$ : w주차, d번째 요일의 19년대비 20년도 송장건수 비율 (d번째 요일 - d = 0:월, 1:화, ..., 6:일)  
$GrowthIndex={\sum_{i=1}^N i \cdot r_i \over \sum_{i=1}^N i} \ s.t\ N : size \ of\ \{r_w,_d |\ d \in [12,17]\},\ r_i: i'th\ element\ of\ \{r_w,_d |\ d \in [12,17]\} \ ordered \ by \ date$ 

In [3]:
def growth_index(df):
    
    def moving_avg(arr):
        values=arr.rate.values
        weights=np.array(range(1,len(values)+1))
        return np.dot(values, weights) /sum(weights)
    
    index = df.loc[df.weekday=='평일'].\
        groupby(['DL_GD_LCLS_NM','year','weekofyear','dayofweek'])['INVC_CONT'].sum().\
        reset_index().\
        groupby(['DL_GD_LCLS_NM','weekofyear','dayofweek']).\
        apply(lambda x : (x.iloc[1,-1]/x.iloc[0,-1]) if x.shape[0]==2 else 1).\
        reset_index(name='rate').\
        query('rate!=1 & weekofyear>=12 & weekofyear <=17').\
        groupby('DL_GD_LCLS_NM').\
        apply(moving_avg)
    
    return dict(index)

In [4]:
growth_index(cj_deliv)

{'가구/인테리어': 1.3959060979376816,
 '도서/음반': 1.5456440976342904,
 '디지털/가전': 1.3424124984800987,
 '생활건강': 1.4170575981044113,
 '스포츠/레저': 1.2906100122939723,
 '식품': 1.489711276070297,
 '출산/육아': 1.073823522359508,
 '패션의류': 1.080711000027851,
 '패션잡화': 1.0392808041969142,
 '화장품/미용': 1.0870318733284736}