<a href="https://colab.research.google.com/github/nekodango/tokyo_stopcovid19_opendata/blob/plotly/tokyo_opendata_covid19_patients.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>



# 1.   データの取得・集計



In [1]:
import pandas as pd

In [2]:
!wget https://stopcovid19.metro.tokyo.lg.jp/data/130001_tokyo_covid19_patients.csv

--2020-07-04 14:56:44--  https://stopcovid19.metro.tokyo.lg.jp/data/130001_tokyo_covid19_patients.csv
Resolving stopcovid19.metro.tokyo.lg.jp (stopcovid19.metro.tokyo.lg.jp)... 178.128.17.49, 2400:6180:0:d1::808:1
Connecting to stopcovid19.metro.tokyo.lg.jp (stopcovid19.metro.tokyo.lg.jp)|178.128.17.49|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 443062 (433K) [text/csv]
Saving to: ‘130001_tokyo_covid19_patients.csv’


2020-07-04 14:56:46 (324 KB/s) - ‘130001_tokyo_covid19_patients.csv’ saved [443062/443062]



In [3]:
# 東京都_新型コロナウイルス陽性患者発表詳細
df_patients = pd.read_csv('130001_tokyo_covid19_patients.csv')
df_patients['公表_年月日'] = pd.to_datetime(df_patients['公表_年月日'], format='%Y-%m-%d')
# 1行目に破損データ?が含まれているため、それを除去する
#df_patients.dropna(subset=['公表_年月日'], inplace=True)

In [4]:
# 読み込んだテーブル先頭5行を表示する
df_patients.head(5)

Unnamed: 0,No,全国地方公共団体コード,都道府県名,市区町村名,公表_年月日,曜日,発症_年月日,患者_居住地,患者_年代,患者_性別,患者_属性,患者_状態,患者_症状,患者_渡航歴の有無フラグ,備考,退院済フラグ
0,1,130001,東京都,,2020-01-24,金,,湖北省武漢市,40代,男性,,,,,,1.0
1,2,130001,東京都,,2020-01-25,土,,湖北省武漢市,30代,女性,,,,,,1.0
2,3,130001,東京都,,2020-01-30,木,,湖南省長沙市,30代,女性,,,,,,1.0
3,4,130001,東京都,,2020-02-13,木,,都内,70代,男性,,,,,,1.0
4,5,130001,東京都,,2020-02-14,金,,都内,50代,女性,,,,,,1.0


In [5]:
# 縦軸に日付、横軸に年代をとった、日別・年代別陽性患者数の集計表を作る
df_tmp = df_patients[['公表_年月日', '患者_年代']]
df_tmp['人数'] = 1
df_tmp2 = df_tmp.pivot(columns='患者_年代', values='人数' )
df_tmp = pd.concat([df_tmp['公表_年月日'], df_tmp2], axis=1).fillna(0)
df_tmp = df_tmp[['公表_年月日', '10歳未満', '10代',  '20代', '30代', '40代', '50代', '60代', '70代', '80代', '90代']]
df_tmp = df_tmp.groupby('公表_年月日').sum()
df_dairy_patients = df_tmp.resample('D').mean().fillna(0) #.to_csv('patients.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [6]:
# 日別・年代別陽性患者数の集計表を表示する
df_dairy_patients

Unnamed: 0_level_0,10歳未満,10代,20代,30代,40代,50代,60代,70代,80代,90代
公表_年月日,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-24,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2020-01-25,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2020-06-30,2.0,5.0,16.0,10.0,8.0,7.0,2.0,3.0,1.0,0.0
2020-07-01,2.0,2.0,35.0,14.0,9.0,3.0,2.0,0.0,0.0,0.0
2020-07-02,3.0,2.0,37.0,34.0,11.0,10.0,2.0,3.0,2.0,3.0
2020-07-03,0.0,4.0,60.0,37.0,9.0,6.0,4.0,2.0,1.0,1.0


# 2. 可視化

In [7]:
import plotly.express as px
import plotly.graph_objects as go

In [9]:
# 年代ごとの患者数推移
df_dairy_patients_long = pd.melt(df_dairy_patients.reset_index(),id_vars='公表_年月日', var_name='年代', value_name='人数')
fig = px.line(df_dairy_patients_long, x='公表_年月日', y='人数', color='年代', range_y=[0, df_dairy_patients_long['人数'].max()])
fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [10]:
# 移動平均を算出
df_dairy_patients_long = pd.melt(df_dairy_patients.reset_index(),id_vars='公表_年月日', var_name='年代', value_name='人数')
df_dairy_patients_total = df_dairy_patients_long.groupby('公表_年月日').sum().reset_index()

for i in [3, 7, 14, 28]:
  df_dairy_patients_total[f'移動平均({i}日間)'] = df_dairy_patients_total['人数'].rolling(i).mean()

df_dairy_patients_total = df_dairy_patients_total.rename(columns={'人数': '実データ'})
df_dairy_patients_total = pd.melt(df_dairy_patients_total, id_vars='公表_年月日', var_name='値', value_name='人数')

In [11]:
# 日別の患者数推移
fig = px.line(df_dairy_patients_total, x='公表_年月日', y='人数', color='値', range_y=[0, df_dairy_patients_total['人数'].max()])
fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [None]:
df_dairy_patients_total