In [274]:
import dateutil.relativedelta as relativedelta
import dateutil.rrule as rrule
import datetime

import numpy as np
import pandas as pd
import pandas_profiling

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [275]:
rs_card_data = pd.read_csv('../../data/bigcontest/02_카드매출데이터(신한카드)/CARD_SPENDING_RESIDENT/CARD_SPENDING_RESIDENT.txt', sep='\t')
fr_card_data = pd.read_csv('../../data/bigcontest/02_카드매출데이터(신한카드)/CARD_SPENDING_FOREIGNER/CARD_SPENDING_FOREIGNER.txt', sep='\t', engine='python')

In [276]:
covid_confirmed = pd.read_csv('../../data/covid19/covid19_confirmed.csv', parse_dates=['date']).drop('Unnamed: 0',axis=1)

In [277]:
covid_confirmed['month'] = [int(str(cnt).split('.')[0]) for cnt in covid_confirmed.date]
covid_confirmed['day'] = [int(str(cnt).split('.')[1]) for cnt in covid_confirmed.date]

In [279]:
covid_confirmed = covid_confirmed.melt(id_vars=['date','month','day'],  var_name='city', value_name='covid19')

In [280]:
covid_confirmed.head()

Unnamed: 0,date,month,day,city,covid19
0,2.18,2,18,seoul,0
1,2.19,2,19,seoul,1
2,2.2,2,20,seoul,7
3,2.21,2,21,seoul,5
4,2.22,2,22,seoul,3


# 전처리
## mct_cat_cd 한글로 변환

In [281]:
mct_dict = { 10:'숙박', 20:'레저용품', 21:'레저업소', 22:'문화취미',
 30:'가구', 31:'전기', 32:'주방용구', 33:'연료판매',
 34:'광학제품', 35:'가전', 40:'유통업', 42:'의복',
 43:'직물', 44:'신변잡화', 50:'서적문구', 52:'사무통신',
 60:'자동차판매', 62:'자동차정비', 70:'의료기관', 71:'보건위생',
 80:'요식업소', 81:'음료식품', 92:'수리서비스' }

In [282]:
rs_card_data.MCT_CAT_CD = [mct_dict[cnt] for cnt in rs_card_data.MCT_CAT_CD]

## x축

In [283]:
# x tics
year =2020
before =datetime.datetime(year,1,1)
after =datetime.datetime(year,12,31)
rr = rrule.rrule(rrule.WEEKLY,byweekday=relativedelta.SU,dtstart=before)

test = pd.DataFrame(rr.between(before,after,inc=True))  + datetime.timedelta(days=-13) # 월요일 기준

test2 = list(map(lambda x : str(pd.to_datetime(x[0]).month)+'/'+ str(pd.to_datetime(x[0]).day)
         , list(test.values)))

## rs_card_data 시간 분할

In [284]:
rs_card_data.STD_DD = pd.to_datetime(rs_card_data.STD_DD,format='%Y%m%d')

rs_card_data.SEX_CD = rs_card_data.SEX_CD.astype('category')

In [285]:
rs_card_data['year'] = rs_card_data.STD_DD.dt.year
rs_card_data['month'] = rs_card_data.STD_DD.dt.month
rs_card_data['day'] = rs_card_data.STD_DD.dt.day
rs_card_data['week'] = rs_card_data.STD_DD.dt.weekofyear 

## 서울 대구 분리

In [286]:
rs_card_data

Unnamed: 0,STD_DD,GU_CD,DONG_CD,MCT_CAT_CD,SEX_CD,AGE_CD,USE_CNT,USE_AMT,year,month,day,week
0,2019-02-01,110,517,숙박,F,25,11,595,2019,2,1,5
1,2019-02-01,110,517,숙박,F,50,5,243,2019,2,1,5
2,2019-02-01,110,517,숙박,F,55,5,27,2019,2,1,5
3,2019-02-01,110,517,숙박,M,20,11,243,2019,2,1,5
4,2019-02-01,110,517,숙박,M,25,11,433,2019,2,1,5
...,...,...,...,...,...,...,...,...,...,...,...,...
2537748,2020-05-31,350,720,음료식품,M,45,19,127,2020,5,31,22
2537749,2020-05-31,350,720,음료식품,M,50,33,211,2020,5,31,22
2537750,2020-05-31,350,720,음료식품,M,55,9,61,2020,5,31,22
2537751,2020-05-31,350,720,음료식품,M,60,19,194,2020,5,31,22


In [287]:
gu_dict = { 140:'seoul', 350:'seoul', 260:'daegu', 110:'daegu' }

In [288]:
rs_card_data.GU_CD = [gu_dict[cnt] for cnt in rs_card_data.GU_CD]

In [289]:
rs_card_data.columns = ['STD_DD', 'city', 'DONG_CD', 'MCT_CAT_CD', 'SEX_CD', 'AGE_CD',
       'USE_CNT', 'USE_AMT', 'year', 'month', 'day', 'week']

In [290]:
rs_card_data.head()

Unnamed: 0,STD_DD,city,DONG_CD,MCT_CAT_CD,SEX_CD,AGE_CD,USE_CNT,USE_AMT,year,month,day,week
0,2019-02-01,daegu,517,숙박,F,25,11,595,2019,2,1,5
1,2019-02-01,daegu,517,숙박,F,50,5,243,2019,2,1,5
2,2019-02-01,daegu,517,숙박,F,55,5,27,2019,2,1,5
3,2019-02-01,daegu,517,숙박,M,20,11,243,2019,2,1,5
4,2019-02-01,daegu,517,숙박,M,25,11,433,2019,2,1,5


# merge

In [291]:
table = rs_card_data.pivot_table(values=['USE_CNT', 'USE_AMT'],
                                 index=['month', 'day', 'week',
                                        'MCT_CAT_CD', 'city', 'year'],
                                 aggfunc='sum').reset_index()

In [292]:
covid_table = pd.merge(table, covid_confirmed,on=['month','day', 'city'], how='left').fillna(0)

In [293]:
covid_table['covid19'] = covid_table['covid19'].astype('int')

In [294]:
covid_table.to_csv('../../data/card_covid.csv')

# elastic

In [295]:
covid_table2020 = covid_table[covid_table['year']==2020].drop('year',axis=1)

In [296]:
seoul_covid_table2020 = covid_table2020[covid_table2020['city']=='seoul'].drop('city',axis=1)
daegu_covid_table2020 = covid_table2020[covid_table2020['city']=='daegu'].drop('city',axis=1)

In [350]:
seoul_covid_table2020

Unnamed: 0,month,day,week,MCT_CAT_CD,USE_AMT,USE_CNT,date,covid19
3,2,1,5,가구,61331,117,0,0
7,2,1,5,가전,258731,569,0,0
11,2,1,5,광학제품,46706,307,0,0
15,2,1,5,레저업소,332995,21906,0,0
19,2,1,5,레저용품,712649,8886,0,0
...,...,...,...,...,...,...,...,...
11008,5,31,22,자동차정비,72164,6846,5.31,6
11012,5,31,22,자동차판매,70,5,5.31,6
11015,5,31,22,전기,482,10,5.31,6
11019,5,31,22,주방용구,17444,252,5.31,6


## 서울

In [446]:
def elastic(data, week, alpha=0, covid=False):
    '''
    data에 필요한 칼럼 : 'covid19', 'USE_AMT', 'week', 'MCT_CAT_CD'
    week만 지정되어 있으면 같은 일자 여러 행 있어도 됨
    
    alpha : 분모 분자에 0이 안되게 더하는 1
    
    covid : 분모에 covid로 나눌지 여부
    '''
    data = data.pivot_table(values=['covid19', 'USE_AMT'], 
                            index=['week', 'MCT_CAT_CD'], aggfunc='sum').reset_index()
    last_week = data[data['week'] == week-1].set_index('MCT_CAT_CD')
    this_week = data[data['week'] == week].set_index('MCT_CAT_CD')
    if covid:
        div = (this_week.covid19 + alpha - last_week.covid19) / (last_week.covid19 + alpha)
    else :
        div = 1
    return ((this_week.USE_AMT - last_week.USE_AMT) / last_week.USE_AMT) / div 

In [447]:
def concat_elastic(data, start_week, end_week, alpha=0, covid=False):
    df = pd.concat([elastic(data, week=cnt, alpha=alpha, covid=covid) for cnt in range(start_week,end_week+1)],axis=1)
    df.columns = list(range(start_week,end_week+1))
    df2 = df.unstack().reset_index()
    df2.columns = ['week', 'MCT_CAT_CD', 'elastic']
    return df2

In [452]:
seoul_df = concat_elastic(data=seoul_covid_table2020, start_week=9, end_week=22, covid=True)

In [453]:
fig = px.line(seoul_df, x="week", y="elastic", color='MCT_CAT_CD')
fig.update_layout(xaxis=dict(tickmode='array', tickvals=np.arange(0, len(test2)), ticktext=test2))
fig.show()

## 대구

In [454]:
daegu_df = concat_elastic(data=daegu_covid_table2020, start_week=9, end_week=22, covid=True)

fig = px.line(daegu_df, x="week", y="elastic", color='MCT_CAT_CD')
fig.update_layout(xaxis=dict(tickmode='array', tickvals=np.arange(0, len(test2)), ticktext=test2))
fig.show()

# 코로나 분모로 안 나눈 경우

In [461]:
seoul_df = concat_elastic(data=seoul_covid_table2020, start_week=7, end_week=22)

fig = px.line(seoul_df, x="week", y="elastic", color='MCT_CAT_CD')
fig.update_layout(xaxis=dict(tickmode='array', tickvals=np.arange(0, len(test2)), ticktext=test2))
fig.show()

In [462]:
daegu_df = concat_elastic(data=daegu_covid_table2020, start_week=7, end_week=22)

fig = px.line(daegu_df, x="week", y="elastic", color='MCT_CAT_CD')
fig.update_layout(xaxis=dict(tickmode='array', tickvals=np.arange(0, len(test2)), ticktext=test2))
fig.show()