## 1. 코로나 확진자 수 vs 지하철 이용량
### 1.1 전처리

In [1]:
import numpy as np
import pandas as pd
import datetime
import holidays
import matplotlib.pyplot as plt
import matplotlib as mpl
import warnings

from plotly.subplots import make_subplots
import plotly.graph_objects as go

# 한글
mpl.rcParams['axes.unicode_minus'] = False
plt.rc('font', family='Malgun Gothic')
warnings.filterwarnings('ignore')

# df : DataFrame
# df.index : datetime64 type
# start, end : str('YYYY-MM-DD')
def slicing(df, start, end):
    dateRange = pd.date_range(start, end)
    return df[df['date'].isin(dateRange)]

# df_covid 전처리 (2019-01-01 ~ 2021-07-31)
df_covid = pd.read_csv('./DATA/002_2.COVID-19.csv')
df_covid['DATE']= df_covid['DATE'].astype('str')
df_covid['DATE'] = pd.to_datetime(df_covid['DATE'])
df_covid.rename(columns= {'ID' : 'COUNT'}, inplace=True)
df_covid = df_covid.groupby(['DATE']).count()
df_covid.insert(0, 'date', df_covid.index)
df_covid = df_covid[{'date', 'COUNT'}]

#df_subway 전처리 (2019-01-01 ~ 2021-07-31)
df_subway = pd.read_csv('./DATA/002_1.DAILY_SUBWAY_INFO.csv')
df_subway['date']= df_subway['date'].astype('str')
df_subway['date'] = pd.to_datetime(df_subway['date'])
df_subway = df_subway.drop(['on_num', 'off_num'], axis=1)

# df에 df_subway, df_covid 합치기
df = pd.merge(df_subway, df_covid, how='outer', on='date')
df = df.fillna(0)
df['COUNT'] = df['COUNT'].astype('Int64')
df['total'] = df['total'].astype('Int64')

# df_subway_kor 전처리
df_subway_kor = pd.read_csv('./DATA/002_3.COVID-19_IN_KOREA.csv', thousands = ',')
df_subway_kor = df_subway_kor.iloc[1:].loc[:, ['date', 'total']]
df_subway_kor['date']= df_subway_kor['date'].astype('str')
df_subway_kor['date'] = pd.to_datetime(df_subway_kor['date'])

# df에 df_subway_kor 합치기
df = pd.merge(df, df_subway_kor, how='outer', on='date')
df.columns = ['date', 'subway', 'COVID-19', 'COVID-19 KOR']
df = df.fillna(0)
df['COVID-19 KOR'] = df['COVID-19 KOR'].astype('Int64')

# df_biz : Business day (법정공휴일, 주말 제외)
holidays = holidays.Korea()
datetime.date(2019, 1, 1) in holidays
datetime.date(2020, 1, 1) in holidays
datetime.date(2021, 1, 1) in holidays
df_biz = df[~df['date'].isin(list(holidays.keys()))]
df_biz = df_biz[df_biz['date'].dt.dayofweek < 5].copy()
df_biz = df_biz.set_index('date')
df = df.set_index('date')

# df_week : 주 단위로 묶기 (평균, 합계, 합계)
df_week = pd.DataFrame()
df_week['subway'] = df_biz['subway'].resample('7D').mean()
df_week['COVID-19'] = df.drop(df.index[0]).resample('7D').sum()['COVID-19']
df_week['COVID-19 KOR'] = df.drop(df.index[0]).resample('7D').sum()['COVID-19 KOR']

# df_quarter: 분기 단위로 묶기
df_quarter = pd.DataFrame()
df_q_sample = df_biz['2020-01-24':'2021-07-31']
df_q_sample2 = df['2020-01-24':'2021-07-31']


df_quarter['subway'] = df_q_sample['subway'].resample('1Q').mean()
df_quarter['COVID-19'] = df_q_sample2['COVID-19'].resample('1Q').sum()
df_quarter = df_quarter.reset_index()
df_quarter['date'] = df_quarter['date'].dt.to_period('Q')
# df_quarter.rename(columns= {'date' : 'quarter'}, inplace=True)

#data type을 float로 통일
df = df.astype('float')
df_biz = df_biz.astype('float')
df_week = df_week.astype('float')

df = df.reset_index()
df_biz = df_biz.reset_index()
df_week = df_week.reset_index()



##########################210909 1140 추가############################
# df_week를 2020-01-31 ~ 2021-07-27 으로 slicing
# column에 'increase' 추가 (전국 신규확진자 수 변화량)
df_week2 = slicing(df_week, '2020-01-31', '2021-07-27')
df_week2_previous_week = slicing(df_week, '2020-01-24', '2021-07-20')

# 다음 line에서 series끼리 뺄셈을 하기 위해서 index를 통일
df_week2_previous_week.set_index(df_week2.index, inplace=True)

# increase : 전주 대비 증가량
df_week2['increase'] = (df_week2['COVID-19 KOR'] \
                    - df_week2_previous_week['COVID-19 KOR'])

# increase percent : 전주 대비 증가율(%)
df_week2['increase percent'] = 100 * df_week2['increase'] \
                    / df_week2_previous_week['COVID-19 KOR']



In [2]:
df_week2

Unnamed: 0,date,subway,COVID-19,COVID-19 KOR,increase,increase percent
57,2020-02-05,14009898.8,4.0,12.0,0.0,0.000000
58,2020-02-12,14546996.0,2.0,4.0,-8.0,-66.666667
59,2020-02-19,13137736.6,37.0,861.0,857.0,21425.000000
60,2020-02-26,10372522.2,47.0,3919.0,3058.0,355.168409
61,2020-03-04,10290419.0,104.0,2701.0,-1218.0,-31.079357
...,...,...,...,...,...,...
129,2021-06-23,13619797.8,1791.0,4267.0,1013.0,31.130916
130,2021-06-30,13585402.8,2536.0,5374.0,1107.0,25.943286
131,2021-07-07,12297445.4,3528.0,8754.0,3380.0,62.895422
132,2021-07-14,11303303.6,3586.0,10178.0,1424.0,16.266849


In [3]:
df_quarter

Unnamed: 0,date,subway,COVID-19
0,2020Q1,12161313.76087,478
1,2020Q2,12387785.016129,844
2,2020Q3,12186759.84375,4002
3,2020Q4,12343831.677419,14069
4,2021Q1,12368386.416667,12835
5,2021Q2,13376056.873016,18091
6,2021Q3,11909896.272727,14504


### 1.2 시각화(image)

In [10]:
# Plotly 라이브러리 사용
# from plotly.subplots import make_subplots
# import plotly.graph_objects as go

fig = go.Figure()
fig = make_subplots(specs=[[{'secondary_y': True}]])

fig.add_trace(go.Scatter(x=df_week2['date'], y= df_week2['subway'], mode='lines', name ='주간 날짜', line=dict(color='royalblue', width=1)), secondary_y=False)  
fig.add_trace(go.Scatter(x=df_week2['date'], y= df_week2['COVID-19'], mode='lines+markers', name ='코로나 확진자'), secondary_y=True) 
fig.add_trace(go.Scatter(x=df_week2['date'], y= df_week2['COVID-19 KOR'], mode='lines+markers', name ='코로나 총 확진자'), secondary_y=True)

# # grid 설정(tick)
# fig.update_yaxes(
#     dtick=1000000,
#     tickvals=[100000, 1000000]
# )


fig.update_layout(title='주간 날짜에 따른 확진자 추이',
                  xaxis_title='주간 날짜(x)',
                  yaxis_title='지하철 승하차량(y)',
                  yaxis2_title='코로나 총 확진자(y2)',
                  
                  # 축 range 설정(xaxis, yaxis1 yaxis2)
                  xaxis=dict(
                             range=['2020-01-01', '2021-07-31']
                  ),
                  yaxis1=dict(
                              range=[9500000, 14900000], dtick=1000000
                  ),
                  yaxis2=dict(
                              range=[0, 12000], dtick=5000 
                  ),
)

fig.show()