In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,6/3/21,6/4/21,6/5/21,6/6/21,6/7/21,6/8/21,6/9/21,6/10/21,6/11/21,6/12/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,75119,76628,77963,79224,80841,82326,84050,85892,87716,88740
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,132360,132372,132374,132379,132384,132397,132415,132426,132437,132449
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,129976,130361,130681,130958,131283,131647,132034,132355,132727,133070
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,13752,13758,13758,13758,13777,13781,13791,13805,13813,13813
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,35140,35307,35594,35772,35854,36004,36115,36325,36455,36600


In [3]:
del df["Lat"]
del df["Long"]

In [4]:
countries_df = df[["Province/State","Country/Region"]]
countries_df.head()

Unnamed: 0,Province/State,Country/Region
0,,Afghanistan
1,,Albania
2,,Algeria
3,,Andorra
4,,Angola


In [5]:
days_df = df.copy()
del days_df["Province/State"]
del days_df["Country/Region"]

In [6]:
days_df = days_df.stack().reset_index()
days_df.columns=["country_id", "date", "count"]
days_df.head()

Unnamed: 0,country_id,date,count
0,0,1/22/20,0
1,0,1/23/20,0
2,0,1/24/20,0
3,0,1/25/20,0
4,0,1/26/20,0


In [7]:
df = pd.merge(countries_df, days_df, left_index=True, right_on="country_id")
del df["country_id"]
df["date"] = pd.to_datetime(df["date"])
df.head()

Unnamed: 0,Province/State,Country/Region,date,count
0,,Afghanistan,2020-01-22,0
1,,Afghanistan,2020-01-23,0
2,,Afghanistan,2020-01-24,0
3,,Afghanistan,2020-01-25,0
4,,Afghanistan,2020-01-26,0


In [8]:
global_df = df.groupby(["date"],as_index=False).sum("count")

In [9]:
global_df.tail()

Unnamed: 0,date,count
503,2021-06-08,173997437
504,2021-06-09,174416271
505,2021-06-10,174865276
506,2021-06-11,175286322
507,2021-06-12,175658224


In [10]:
latest_date_str = global_df.date.max().strftime("%Y-%m-%d")

In [11]:
import matplotlib.pyplot as plt
%matplotlib notebook
#%matplotlib inline

In [12]:
df["Country/Region"].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece',
       'Grenada', 'Guatemala', 'Guin

In [13]:
#plt.figure(figsize=(9.5,5))
#plt.bar(global_df["date"],global_df["count"]/1000,label="positive")
#plt.legend()

In [14]:
trans = pd.DataFrame()
trans["date"] = global_df.iloc[1:]["date"]
trans["count1"] = global_df["count"].iloc[1:]
trans = trans.reset_index().drop(columns="index")
trans["count"] = trans["count1"] - global_df["count"].iloc[0:]
del trans["count1"]
trans["move_ave7"] = trans["count"].rolling(7).mean()


In [15]:
trans.tail()

Unnamed: 0,date,count,move_ave7
502,2021-06-08,366171.0,400332.428571
503,2021-06-09,418834.0,390779.857143
504,2021-06-10,449005.0,385049.571429
505,2021-06-11,421046.0,385239.428571
506,2021-06-12,371902.0,381459.571429


In [16]:
fig=plt.figure(figsize=(9.5,8))
plt.subplot(2,1,1) # (rows, columns, panel number)
plt.bar(trans["date"],trans["count"],label="新規感染者数")
plt.plot_date(trans["date"],trans["move_ave7"],label="７日間移動平均", fmt="-", color="orange")
plt.title("全世界の新規感染者数の推移({}時点)".format(latest_date_str))
plt.ylabel("人")
plt.grid(axis="y")
plt.legend()

plt.subplot(2,1,2) # (rows, columns, panel number)
plt.title("全世界の新規感染者数の推移（3月以降）({}時点)".format(latest_date_str))
chart_data = trans[trans.date >= "2021-03-01"]
plt.bar(chart_data["date"],chart_data["count"],label="新規感染者数")
plt.plot_date(chart_data["date"],chart_data["move_ave7"],label="７日間移動平均", fmt="-", color="orange")
plt.grid(axis="y")
plt.legend()
#plt.legend(bbox_to_anchor=(0, 1),loc='upper left', fontsize=9)
fig.text(0,0
         ,"※感染者の情報提供:COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University",
        fontsize=7,ha="left",va="bottom")

fig.tight_layout()

<IPython.core.display.Javascript object>

In [17]:
fig.savefig("out/covid-global-transition.png")

## 国別

In [18]:
df.tail()

Unnamed: 0,Province/State,Country/Region,date,count
140203,,Zimbabwe,2021-06-08,39321
140204,,Zimbabwe,2021-06-09,39432
140205,,Zimbabwe,2021-06-10,39496
140206,,Zimbabwe,2021-06-11,39688
140207,,Zimbabwe,2021-06-12,39852


### 中国の合計行を追加する

In [19]:
china_sum = df[df["Country/Region"]=="China"].groupby("date", as_index=False).sum("count")
china_sum

Unnamed: 0,date,count
0,2020-01-22,548
1,2020-01-23,643
2,2020-01-24,920
3,2020-01-25,1406
4,2020-01-26,2075
...,...,...
503,2021-06-08,103235
504,2021-06-09,103261
505,2021-06-10,103285
506,2021-06-11,103320


In [20]:
china_sum.insert(0,"Country/Region","China")
china_sum.insert(0,"Province/State",np.nan)
china_sum

NameError: name 'np' is not defined

In [21]:
df = pd.concat([df,china_sum])

### 国別新規陽性者数

In [22]:
trans = df.copy()
trans["inc_count"] = trans["count"].diff()
# 最小の日付は一律ゼロにする
trans.loc[trans["date"]==trans["date"].min(),"inc_count"] = 0

In [23]:
## 異常値？の訂正
import numpy as np
trans.loc[(trans["Country/Region"]=="France") & (trans.inc_count < -300000),"inc_count"]=np.nan

#### 移動平均

In [24]:
# "Province/State"と"Country/Region"の結合列を作ってユニークにする
trans["country_key"]=trans["Province/State"].fillna("NA")+"-"+trans["Country/Region"]
if trans["country_key"].isna().any():
    raise Exception("include NaN")
for k in trans.country_key.unique():
    print(k,end=",")
    trans.loc[trans.country_key==k,"cnt_move_ave7"] = trans.loc[trans.country_key==k,"count"].rolling(7).mean()
    trans.loc[trans.country_key==k,"inc_move_ave7"] = trans.loc[trans.country_key==k,"inc_count"].rolling(7).mean()

pd.concat([trans[trans.country_key=="NA-Afghanistan"].tail(3),trans[trans.country_key=="NA-Albania"].head(3)])

NA-Afghanistan,NA-Albania,NA-Algeria,NA-Andorra,NA-Angola,NA-Antigua and Barbuda,NA-Argentina,NA-Armenia,Australian Capital Territory-Australia,New South Wales-Australia,Northern Territory-Australia,Queensland-Australia,South Australia-Australia,Tasmania-Australia,Victoria-Australia,Western Australia-Australia,NA-Austria,NA-Azerbaijan,NA-Bahamas,NA-Bahrain,NA-Bangladesh,NA-Barbados,NA-Belarus,NA-Belgium,NA-Belize,NA-Benin,NA-Bhutan,NA-Bolivia,NA-Bosnia and Herzegovina,NA-Botswana,NA-Brazil,NA-Brunei,NA-Bulgaria,NA-Burkina Faso,NA-Burma,NA-Burundi,NA-Cabo Verde,NA-Cambodia,NA-Cameroon,Alberta-Canada,British Columbia-Canada,Diamond Princess-Canada,Grand Princess-Canada,Manitoba-Canada,New Brunswick-Canada,Newfoundland and Labrador-Canada,Northwest Territories-Canada,Nova Scotia-Canada,Nunavut-Canada,Ontario-Canada,Prince Edward Island-Canada,Quebec-Canada,Repatriated Travellers-Canada,Saskatchewan-Canada,Yukon-Canada,NA-Central African Republic,NA-Chad,NA-Chile,Anhui-China,Beijing-China,

Unnamed: 0,Province/State,Country/Region,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7
505,,Afghanistan,2021-06-10,85892,1842.0,NA-Afghanistan,80989.142857,1539.0
506,,Afghanistan,2021-06-11,87716,1824.0,NA-Afghanistan,82573.142857,1584.0
507,,Afghanistan,2021-06-12,88740,1024.0,NA-Afghanistan,84112.714286,1539.571429
508,,Albania,2020-01-22,0,0.0,NA-Albania,,
509,,Albania,2020-01-23,0,0.0,NA-Albania,,
510,,Albania,2020-01-24,0,0.0,NA-Albania,,


In [25]:
trans["Country/Region"].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece',
       'Grenada', 'Guatemala', 'Guin

## 人口当たり

In [26]:
pop = pd.read_excel("world-data/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx",header=16)
pop_latest = pop.loc[pop["Type"]=="Country/Area"][[pop.columns[2],pop.columns[-1]]]
pop_latest.columns = ["un_country","population"]
pop_latest.head(1)

Unnamed: 0,un_country,population
26,Burundi,11890.781


### 名称変換

In [27]:
alternates = pd.DataFrame([
    ("US","United States of America"),
    ("Russia","Russian Federation"),
    ("Iran","Iran (Islamic Republic of)"),
    ("Bolivia","Bolivia (Plurinational State of)"),
    ("Moldova","Republic of Moldova"),
    ("Venezuela","Venezuela (Bolivarian Republic of)"),
    ("Korea, South","Republic of Korea"),
    ("Congo (Kinshasa)","Democratic Republic of the Congo"),
    ("Congo (Brazzaville)","Congo"),
    ("Vietnam","Viet Nam"),
    ("Laos","Lao People's Democratic Republic"),
    ("Taiwan*","China, Taiwan Province of China"),
    ("Tanzania","United Republic of Tanzania"),
    ("Brunei","Brunei Darussalam"),
    ("Micronesia","Micronesia (Fed. States of)"),
    ("Syria","Syrian Arab Republic"),
], columns = ["covid_country","un_country"])

In [28]:
alternates.head(2)

Unnamed: 0,covid_country,un_country
0,US,United States of America
1,Russia,Russian Federation


In [29]:
trans_pop = pd.merge(trans, alternates, left_on="Country/Region", right_on="covid_country", how="left")
del trans_pop["covid_country"]
trans_pop.rename(columns={"Country/Region":"country"},inplace=True)

In [30]:
trans_pop.head()

Unnamed: 0,Province/State,country,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country
0,,Afghanistan,2020-01-22,0,0.0,NA-Afghanistan,,,
1,,Afghanistan,2020-01-23,0,0.0,NA-Afghanistan,,,
2,,Afghanistan,2020-01-24,0,0.0,NA-Afghanistan,,,
3,,Afghanistan,2020-01-25,0,0.0,NA-Afghanistan,,,
4,,Afghanistan,2020-01-26,0,0.0,NA-Afghanistan,,,


In [31]:
trans_pop.isnull().any(axis=0)

Province/State     True
country           False
date              False
count             False
inc_count          True
country_key       False
cnt_move_ave7      True
inc_move_ave7      True
un_country         True
dtype: bool

In [32]:
trans_pop["un_country"].fillna(trans_pop["country"],inplace=True)

In [33]:
trans_pop.isnull().any(axis=0)

Province/State     True
country           False
date              False
count             False
inc_count          True
country_key       False
cnt_move_ave7      True
inc_move_ave7      True
un_country        False
dtype: bool

In [34]:
trans_pop = pd.merge(trans_pop, pop_latest, on="un_country", how="left")

In [35]:
trans_pop.head()

Unnamed: 0,Province/State,country,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population
0,,Afghanistan,2020-01-22,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341
1,,Afghanistan,2020-01-23,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341
2,,Afghanistan,2020-01-24,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341
3,,Afghanistan,2020-01-25,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341
4,,Afghanistan,2020-01-26,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341


In [36]:
trans_pop["count_per_mil_pop"] = trans_pop["count"] / (trans_pop["population"] / 1000)
trans_pop["inc_count_per_mil_pop"] = trans_pop["inc_count"] / (trans_pop["population"] / 1000)
trans_pop["cnt_move_ave7_milpop"] = trans_pop["cnt_move_ave7"] / (trans_pop["population"] / 1000)
trans_pop["inc_move_ave7_milpop"] = trans_pop["inc_move_ave7"] / (trans_pop["population"] / 1000)

In [37]:
pd.concat([trans_pop.head(),trans_pop.tail()])

Unnamed: 0,Province/State,country,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop,cnt_move_ave7_milpop,inc_move_ave7_milpop
0,,Afghanistan,2020-01-22,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341,0.0,0.0,,
1,,Afghanistan,2020-01-23,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341,0.0,0.0,,
2,,Afghanistan,2020-01-24,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341,0.0,0.0,,
3,,Afghanistan,2020-01-25,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341,0.0,0.0,,
4,,Afghanistan,2020-01-26,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341,0.0,0.0,,
140711,,China,2021-06-08,103235,19.0,NA-China,103151.285714,27.142857,China,1439323.774,71.724654,0.013201,71.666492,0.018858
140712,,China,2021-06-09,103261,26.0,NA-China,103178.714286,27.428571,China,1439323.774,71.742718,0.018064,71.685549,0.019057
140713,,China,2021-06-10,103285,24.0,NA-China,103206.0,27.285714,China,1439323.774,71.759393,0.016674,71.704506,0.018957
140714,,China,2021-06-11,103320,35.0,NA-China,103234.714286,28.714286,China,1439323.774,71.78371,0.024317,71.724456,0.01995
140715,,China,2021-06-12,103357,37.0,NA-China,103264.285714,29.571429,China,1439323.774,71.809416,0.025707,71.745001,0.020545


In [38]:
trans_pop.loc[trans_pop["country"]=="Japan"].tail()


Unnamed: 0,Province/State,country,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop,cnt_move_ave7_milpop,inc_move_ave7_milpop
79243,,Japan,2021-06-08,766068,1883.0,NA-Japan,760150.571429,2319.857143,Japan,126476.458,6057.000742,14.888146,6010.213944,18.342205
79244,,Japan,2021-06-09,768313,2245.0,NA-Japan,762357.428571,2206.857143,Japan,126476.458,6074.751081,17.750339,6027.662702,17.448758
79245,,Japan,2021-06-10,770357,2044.0,NA-Japan,764449.428571,2092.0,Japan,126476.458,6090.912192,16.16111,6044.20333,16.540628
79246,,Japan,2021-06-11,772293,1936.0,NA-Japan,766449.857143,2000.428571,Japan,126476.458,6106.219388,15.307197,6060.019938,15.816608
79247,,Japan,2021-06-12,774240,1947.0,NA-Japan,768348.0,1898.142857,Japan,126476.458,6121.613558,15.394169,6075.027813,15.007875


### 主要国の定義

In [39]:
countries = pd.DataFrame({"country":[
    "Japan",
    "US",
    "India",
    "Sweden",
    "Brazil",
    "Israel",
#    "Korea, South",
#    "Taiwan*",
    "United Kingdom",
    "France",
]})
dates = trans["date"].unique()

In [40]:
#c_df = pd.merge(countries, trans_pop[trans_pop["Province/State"].isna()], left_on="country", right_on="Country/Region", how="inner")[["country","date","count","inc_count","move_ave7"]]
c_trans = pd.merge(countries, trans_pop[trans_pop["Province/State"].isna()], on="country", how="inner")


In [41]:
c_trans

Unnamed: 0,country,Province/State,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop,cnt_move_ave7_milpop,inc_move_ave7_milpop
0,Japan,,2020-01-22,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
1,Japan,,2020-01-23,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
2,Japan,,2020-01-24,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
3,Japan,,2020-01-25,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
4,Japan,,2020-01-26,4,2.0,NA-Japan,,,Japan,126476.458,0.031626,0.015813,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4059,France,,2021-06-08,5659478,5800.0,NA-France,5.644109e+06,5763.571429,France,65273.512,86704.052327,88.85687,86468.603506,88.298779
4060,France,,2021-06-09,5663717,4239.0,NA-France,5.650479e+06,6369.142857,France,65273.512,86768.994443,64.942116,86566.17973,97.576224
4061,France,,2021-06-10,5667993,4276.0,NA-France,5.655260e+06,4781.000000,France,65273.512,86834.503405,65.508962,86639.425368,73.245638
4062,France,,2021-06-11,5671864,3871.0,NA-France,5.659626e+06,4366.857143,France,65273.512,86893.807706,59.304301,86706.326275,66.900907


In [42]:
len(dates)

508

In [43]:
c_trans[c_trans["country"]=="Japan"].tail()

Unnamed: 0,country,Province/State,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop,cnt_move_ave7_milpop,inc_move_ave7_milpop
503,Japan,,2021-06-08,766068,1883.0,NA-Japan,760150.571429,2319.857143,Japan,126476.458,6057.000742,14.888146,6010.213944,18.342205
504,Japan,,2021-06-09,768313,2245.0,NA-Japan,762357.428571,2206.857143,Japan,126476.458,6074.751081,17.750339,6027.662702,17.448758
505,Japan,,2021-06-10,770357,2044.0,NA-Japan,764449.428571,2092.0,Japan,126476.458,6090.912192,16.16111,6044.20333,16.540628
506,Japan,,2021-06-11,772293,1936.0,NA-Japan,766449.857143,2000.428571,Japan,126476.458,6106.219388,15.307197,6060.019938,15.816608
507,Japan,,2021-06-12,774240,1947.0,NA-Japan,768348.0,1898.142857,Japan,126476.458,6121.613558,15.394169,6075.027813,15.007875


In [44]:
print(any(c_trans["date"].isnull()))
print(any(c_trans["date"].isna()))

False
False


In [45]:
c_trans.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4064 entries, 0 to 4063
Data columns (total 14 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   country                4064 non-null   object        
 1   Province/State         0 non-null      object        
 2   date                   4064 non-null   datetime64[ns]
 3   count                  4064 non-null   int64         
 4   inc_count              4063 non-null   float64       
 5   country_key            4064 non-null   object        
 6   cnt_move_ave7          4016 non-null   float64       
 7   inc_move_ave7          4009 non-null   float64       
 8   un_country             4064 non-null   object        
 9   population             4064 non-null   object        
 10  count_per_mil_pop      4064 non-null   object        
 11  inc_count_per_mil_pop  4063 non-null   object        
 12  cnt_move_ave7_milpop   4016 non-null   object        
 13  inc

### 東アジア

In [46]:
# 東Asia 主要国
e_asia_c = pd.DataFrame({"country":[
    "Japan",
    "China",
    "Taiwan*",
    "Korea, South",
    "Indonesia",
    "Philippines",
    "Vietnam",
]})

In [47]:
e_a_trans = pd.merge(e_asia_c, trans_pop[trans_pop["Province/State"].isna()], on="country", how="inner")
e_a_trans

Unnamed: 0,country,Province/State,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop,cnt_move_ave7_milpop,inc_move_ave7_milpop
0,Japan,,2020-01-22,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
1,Japan,,2020-01-23,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
2,Japan,,2020-01-24,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
3,Japan,,2020-01-25,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
4,Japan,,2020-01-26,4,2.0,NA-Japan,,,Japan,126476.458,0.031626,0.015813,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3551,Vietnam,,2021-06-08,9222,195.0,NA-Vietnam,8559.571429,228.142857,Viet Nam,97338.583,94.741465,2.003317,87.93606,2.343807
3552,Vietnam,,2021-06-09,9635,413.0,NA-Vietnam,8811.714286,252.142857,Viet Nam,97338.583,98.984387,4.242922,90.526429,2.590369
3553,Vietnam,,2021-06-10,9835,200.0,NA-Vietnam,9064.857143,253.142857,Viet Nam,97338.583,101.039071,2.054684,93.127071,2.600643
3554,Vietnam,,2021-06-11,10048,213.0,NA-Vietnam,9305.428571,240.571429,Viet Nam,97338.583,103.227309,2.188238,95.598562,2.471491


In [48]:
e_a_trans.count()

country                  3556
Province/State              0
date                     3556
count                    3556
inc_count                3556
country_key              3556
cnt_move_ave7            3514
inc_move_ave7            3514
un_country               3556
population               3556
count_per_mil_pop        3556
inc_count_per_mil_pop    3556
cnt_move_ave7_milpop     3514
inc_move_ave7_milpop     3514
dtype: int64

## チャート化

In [49]:
# 主要国　新規感染者数　全期間
plt.figure(figsize=(9.5,5))
plt.grid()
dates = c_trans["date"].unique()
for c in countries["country"]:
    print(c)
    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["inc_count"],label=c, fmt="-")
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

Japan
US
India
Sweden
Brazil
Israel
United Kingdom
France


In [50]:
# 主要国　新規感染者数７日間移動平均　全期間
plt.figure(figsize=(9.5,5))
plt.grid()
dates = c_trans["date"].unique()
for c in countries["country"]:
    print(c)
    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["cnt_move_ave7"],label=c, fmt="-")
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

Japan
US
India
Sweden
Brazil
Israel
United Kingdom
France


In [51]:
# 主要国　１００万人あたり感染者数累計 全期間　７日間移動平均
plt.figure(figsize=(9.5,5))
plt.grid()
plt.title("positives per million poplulasions transition(1w moving-ave)")
dates = c_trans["date"].unique()
for c in countries["country"]:
    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["cnt_move_ave7_milpop"],label=c, fmt="-")
    
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

In [52]:
# 主要国　１００万人あたり感染者数累計 1月以降　７日間移動平均
plt.figure(figsize=(9.5,5))
plt.grid()
plt.title("positives per million poplulasions transition(1w moving-ave)(21/1/1-)")
dates = c_trans.loc[c_trans["date"]>="2021-01-01"]["date"].unique()
for c in countries["country"]:
    plt.plot_date(dates,c_trans.loc[(c_trans["country"]==c) & (c_trans["date"]>="2021-01-01")]["cnt_move_ave7_milpop"],label=c, fmt="-")
    
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

## 人口当たりの新規感染者数

In [53]:
# 主要国　１００万人あたり新規感染者数 全期間　７日間移動平均
plt.figure(figsize=(9.5,5))
plt.grid()
plt.title("increase positives per million poplulasions transition(1w moving-ave)")
dates = c_trans["date"].unique()
for c in countries["country"]:
    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["inc_move_ave7_milpop"],label=c, fmt="-")
    plt.text(dates.max(),c_trans.loc[(c_trans["country"]==c) & (c_trans["date"]==dates.max())]["inc_move_ave7_milpop"],c,fontsize=8)

plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

## 人口100万人当たりの新規感染者数（3月以降）

In [54]:
# 主要国　１００万人あたり感染者数累計 3月以降　７日間移動平均
fig=plt.figure(figsize=(9.5,10))
plt.subplot(2,1,1) # (rows, columns, panel number)
plt.grid()
plt.title("主要国の人口100万人当たりの新規感染者数７日間移動平均({}時点)".format(latest_date_str))
chart_data = c_trans[c_trans.date >= "2021-03-01"]
dates = chart_data["date"].unique()
for c in countries["country"]:
    plt.plot_date(dates,chart_data.loc[chart_data["country"]==c]["inc_move_ave7_milpop"],label=c, fmt="-")
    plt.text(dates.max(),chart_data.loc[(chart_data["country"]==c) & (chart_data["date"]==dates.max())]["inc_move_ave7_milpop"],c
            ,fontsize=8,va="center")
plt.legend(bbox_to_anchor=(0, 1),loc='upper left', fontsize=9)

plt.subplot(2,1,2) # (rows, columns, panel number)
plt.grid()
plt.title("東・東南アジア主要国の人口100万人当たりの新規感染者数７日間移動平均({}時点)".format(latest_date_str))
chart_data = e_a_trans[e_a_trans.date >= "2021-03-01"]
dates = chart_data["date"].unique()
for c in e_asia_c["country"]:
    plt.plot_date(dates,chart_data.loc[chart_data["country"]==c]["inc_move_ave7_milpop"],label=c, fmt="-")
    plt.text(dates.max(),chart_data.loc[(chart_data["country"]==c) & (chart_data["date"]==dates.max())]["inc_move_ave7_milpop"],c
             ,fontsize=8,va="center")
plt.legend(bbox_to_anchor=(0, 1),loc='upper left', fontsize=9)
plt.text(dates.max(),-1,"※ChinaはProvince/State毎の値を合算",ha="right",va="top",fontsize=8)

fig.text(0,0
         ,"※感染者の情報提供:COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University,"\
         +"※人口の情報提供:WHO",
        fontsize=7,ha="left",va="bottom")
fig.tight_layout()


<IPython.core.display.Javascript object>

In [55]:
fig.savefig("out/covid-global-countries-transition.png")