In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,11/15/22,11/16/22,11/17/22,11/18/22,11/19/22,11/20/22,11/21/22,11/22/22,11/23/22,11/24/22
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,204724,204820,204982,205009,205039,205146,205229,205324,205391,205506
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,333197,333215,333233,333233,333246,333256,333257,333282,333293,333305
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,270969,270981,270996,270996,271011,271023,271028,271035,271041,271050
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,46664,46824,46824,46824,46824,46824,46824,46824,46824,46824
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,103131,103131,103131,103131,103131,103131,103131,103131,104491,104491


In [3]:
del df["Lat"]
del df["Long"]

In [4]:
countries_df = df[["Province/State","Country/Region"]]
countries_df.head()

Unnamed: 0,Province/State,Country/Region
0,,Afghanistan
1,,Albania
2,,Algeria
3,,Andorra
4,,Angola


In [5]:
days_df = df.copy()
del days_df["Province/State"]
del days_df["Country/Region"]

In [6]:
days_df = days_df.stack().reset_index()
days_df.columns=["country_id", "date", "count"]
days_df.head()

Unnamed: 0,country_id,date,count
0,0,1/22/20,0
1,0,1/23/20,0
2,0,1/24/20,0
3,0,1/25/20,0
4,0,1/26/20,0


In [7]:
df = pd.merge(countries_df, days_df, left_index=True, right_on="country_id")
del df["country_id"]
df["date"] = pd.to_datetime(df["date"])
df.head()

Unnamed: 0,Province/State,Country/Region,date,count
0,,Afghanistan,2020-01-22,0
1,,Afghanistan,2020-01-23,0
2,,Afghanistan,2020-01-24,0
3,,Afghanistan,2020-01-25,0
4,,Afghanistan,2020-01-26,0


In [8]:
global_df = df.groupby(["date"],as_index=False).sum("count")

In [9]:
global_df.tail()

Unnamed: 0,date,count
1033,2022-11-20,638083364
1034,2022-11-21,638439308
1035,2022-11-22,638996980
1036,2022-11-23,639576590
1037,2022-11-24,640360237


In [10]:
latest_date_str = global_df.date.max().strftime("%Y-%m-%d")

In [11]:
import matplotlib.pyplot as plt
%matplotlib notebook
#%matplotlib inline

In [12]:
df["Country/Region"].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antarctica', 'Antigua and Barbuda', 'Argentina', 'Armenia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana',
       'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece',
       'Grenada', 'Gua

In [13]:
#plt.figure(figsize=(9.5,5))
#plt.bar(global_df["date"],global_df["count"]/1000,label="positive")
#plt.legend()

In [14]:
trans = pd.DataFrame()
trans["date"] = global_df.iloc[1:]["date"]
trans["count1"] = global_df["count"].iloc[1:]
trans = trans.reset_index().drop(columns="index")
trans["count"] = trans["count1"] - global_df["count"].iloc[0:]
del trans["count1"]
trans["move_ave7"] = trans["count"].rolling(7).mean()


In [15]:
trans.tail()

Unnamed: 0,date,count,move_ave7
1032,2022-11-20,221701.0,415624.142857
1033,2022-11-21,355944.0,414319.142857
1034,2022-11-22,557672.0,423376.0
1035,2022-11-23,579610.0,437566.142857
1036,2022-11-24,783647.0,481924.428571


In [16]:
fig=plt.figure(figsize=(9.5,8))
plt.subplot(2,1,1) # (rows, columns, panel number)
plt.bar(trans["date"],trans["count"],label="New cases")
plt.plot_date(trans["date"],trans["move_ave7"],label="rolling 7-day average", fmt="-", color="orange")
plt.title("Daily new confirmed COVID-19 cases in the world({})".format(latest_date_str))
plt.ylabel("people")
plt.grid(axis="y")
plt.legend()

plt.subplot(2,1,2) # (rows, columns, panel number)
plt.title("Daily new confirmed COVID-19 cases in the world(after 2021 Mar)({})".format(latest_date_str))
chart_data = trans[trans.date >= "2021-03-01"]
plt.bar(chart_data["date"],chart_data["count"],label="New cases")
plt.plot_date(chart_data["date"],chart_data["move_ave7"],label="rolling 7-day average", fmt="-", color="orange")
plt.grid(axis="y")
plt.legend()
#plt.legend(bbox_to_anchor=(0, 1),loc='upper left', fontsize=9)
fig.text(0,0
         ,"*Data reference from:COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University",
        fontsize=7,ha="left",va="bottom")

fig.tight_layout()

<IPython.core.display.Javascript object>

In [17]:
fig.savefig("out/covid-global-transition.png")

## 国別

In [18]:
df.tail()

Unnamed: 0,Province/State,Country/Region,date,count
299977,,Zimbabwe,2022-11-20,257893
299978,,Zimbabwe,2022-11-21,257893
299979,,Zimbabwe,2022-11-22,257893
299980,,Zimbabwe,2022-11-23,257893
299981,,Zimbabwe,2022-11-24,257893


### 中国の合計行を追加する

In [19]:
china_sum = df[df["Country/Region"]=="China"].groupby("date", as_index=False).sum("count")
china_sum

Unnamed: 0,date,count
0,2020-01-22,548
1,2020-01-23,643
2,2020-01-24,920
3,2020-01-25,1406
4,2020-01-26,2075
...,...,...
1033,2022-11-20,3329533
1034,2022-11-21,3364415
1035,2022-11-22,3373507
1036,2022-11-23,3437108


In [20]:
import numpy as np
china_sum.insert(0,"Country/Region","China")
china_sum.insert(0,"Province/State",np.nan)
china_sum

Unnamed: 0,Province/State,Country/Region,date,count
0,,China,2020-01-22,548
1,,China,2020-01-23,643
2,,China,2020-01-24,920
3,,China,2020-01-25,1406
4,,China,2020-01-26,2075
...,...,...,...,...
1033,,China,2022-11-20,3329533
1034,,China,2022-11-21,3364415
1035,,China,2022-11-22,3373507
1036,,China,2022-11-23,3437108


In [21]:
df = pd.concat([df,china_sum])

### 国別新規陽性者数

In [22]:
trans = df.copy()
trans["inc_count"] = trans["count"].diff()
# 最小の日付は一律ゼロにする
trans.loc[trans["date"]==trans["date"].min(),"inc_count"] = 0

In [23]:
## 異常値？の訂正
import numpy as np
trans.loc[(trans["Country/Region"]=="France") & (trans.inc_count < -300000),"inc_count"]=np.nan

#### 移動平均

In [24]:
# "Province/State"と"Country/Region"の結合列を作ってユニークにする
trans["country_key"]=trans["Province/State"].fillna("NA")+"-"+trans["Country/Region"]
if trans["country_key"].isna().any():
    raise Exception("include NaN")
for k in trans.country_key.unique():
    print(k,end=",")
    trans.loc[trans.country_key==k,"cnt_move_ave7"] = trans.loc[trans.country_key==k,"count"].rolling(7).mean()
    trans.loc[trans.country_key==k,"inc_move_ave7"] = trans.loc[trans.country_key==k,"inc_count"].rolling(7).mean()

pd.concat([trans[trans.country_key=="NA-Afghanistan"].tail(3),trans[trans.country_key=="NA-Albania"].head(3)])

NA-Afghanistan,NA-Albania,NA-Algeria,NA-Andorra,NA-Angola,NA-Antarctica,NA-Antigua and Barbuda,NA-Argentina,NA-Armenia,Australian Capital Territory-Australia,New South Wales-Australia,Northern Territory-Australia,Queensland-Australia,South Australia-Australia,Tasmania-Australia,Victoria-Australia,Western Australia-Australia,NA-Austria,NA-Azerbaijan,NA-Bahamas,NA-Bahrain,NA-Bangladesh,NA-Barbados,NA-Belarus,NA-Belgium,NA-Belize,NA-Benin,NA-Bhutan,NA-Bolivia,NA-Bosnia and Herzegovina,NA-Botswana,NA-Brazil,NA-Brunei,NA-Bulgaria,NA-Burkina Faso,NA-Burma,NA-Burundi,NA-Cabo Verde,NA-Cambodia,NA-Cameroon,Alberta-Canada,British Columbia-Canada,Diamond Princess-Canada,Grand Princess-Canada,Manitoba-Canada,New Brunswick-Canada,Newfoundland and Labrador-Canada,Northwest Territories-Canada,Nova Scotia-Canada,Nunavut-Canada,Ontario-Canada,Prince Edward Island-Canada,Quebec-Canada,Repatriated Travellers-Canada,Saskatchewan-Canada,Yukon-Canada,NA-Central African Republic,NA-Chad,NA-Chile,Anhui-China,

Unnamed: 0,Province/State,Country/Region,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7
1035,,Afghanistan,2022-11-22,205324,95.0,NA-Afghanistan,205078.428571,85.714286
1036,,Afghanistan,2022-11-23,205391,67.0,NA-Afghanistan,205160.0,81.571429
1037,,Afghanistan,2022-11-24,205506,115.0,NA-Afghanistan,205234.857143,74.857143
1038,,Albania,2020-01-22,0,0.0,NA-Albania,,
1039,,Albania,2020-01-23,0,0.0,NA-Albania,,
1040,,Albania,2020-01-24,0,0.0,NA-Albania,,


In [25]:
trans["Country/Region"].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antarctica', 'Antigua and Barbuda', 'Argentina', 'Armenia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana',
       'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece',
       'Grenada', 'Gua

## 人口当たり

In [26]:
pop = pd.read_excel("world-data/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx",header=16)
pop_latest = pop.loc[pop["Type"]=="Country/Area"][[pop.columns[2],pop.columns[-1]]]
pop_latest.columns = ["un_country","population"]
pop_latest.head(1)

Unnamed: 0,un_country,population
26,Burundi,11890.781


### 名称変換

In [27]:
alternates = pd.DataFrame([
    ("US","United States of America"),
    ("Russia","Russian Federation"),
    ("Iran","Iran (Islamic Republic of)"),
    ("Bolivia","Bolivia (Plurinational State of)"),
    ("Moldova","Republic of Moldova"),
    ("Venezuela","Venezuela (Bolivarian Republic of)"),
    ("Korea, South","Republic of Korea"),
    ("Congo (Kinshasa)","Democratic Republic of the Congo"),
    ("Congo (Brazzaville)","Congo"),
    ("Vietnam","Viet Nam"),
    ("Laos","Lao People's Democratic Republic"),
    ("Taiwan*","China, Taiwan Province of China"),
    ("Tanzania","United Republic of Tanzania"),
    ("Brunei","Brunei Darussalam"),
    ("Micronesia","Micronesia (Fed. States of)"),
    ("Syria","Syrian Arab Republic"),
], columns = ["covid_country","un_country"])

In [28]:
alternates.head(2)

Unnamed: 0,covid_country,un_country
0,US,United States of America
1,Russia,Russian Federation


In [29]:
trans_pop = pd.merge(trans, alternates, left_on="Country/Region", right_on="covid_country", how="left")
del trans_pop["covid_country"]
trans_pop.rename(columns={"Country/Region":"country"},inplace=True)

In [30]:
trans_pop.head()

Unnamed: 0,Province/State,country,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country
0,,Afghanistan,2020-01-22,0,0.0,NA-Afghanistan,,,
1,,Afghanistan,2020-01-23,0,0.0,NA-Afghanistan,,,
2,,Afghanistan,2020-01-24,0,0.0,NA-Afghanistan,,,
3,,Afghanistan,2020-01-25,0,0.0,NA-Afghanistan,,,
4,,Afghanistan,2020-01-26,0,0.0,NA-Afghanistan,,,


In [31]:
trans_pop.isnull().any(axis=0)

Province/State     True
country           False
date              False
count             False
inc_count          True
country_key       False
cnt_move_ave7      True
inc_move_ave7      True
un_country         True
dtype: bool

In [32]:
trans_pop["un_country"].fillna(trans_pop["country"],inplace=True)

In [33]:
trans_pop.isnull().any(axis=0)

Province/State     True
country           False
date              False
count             False
inc_count          True
country_key       False
cnt_move_ave7      True
inc_move_ave7      True
un_country        False
dtype: bool

In [34]:
trans_pop = pd.merge(trans_pop, pop_latest, on="un_country", how="left")

In [35]:
trans_pop.head()

Unnamed: 0,Province/State,country,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population
0,,Afghanistan,2020-01-22,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341
1,,Afghanistan,2020-01-23,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341
2,,Afghanistan,2020-01-24,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341
3,,Afghanistan,2020-01-25,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341
4,,Afghanistan,2020-01-26,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341


In [36]:
trans_pop["count_per_mil_pop"] = trans_pop["count"] / (trans_pop["population"] / 1000)
trans_pop["inc_count_per_mil_pop"] = trans_pop["inc_count"] / (trans_pop["population"] / 1000)
trans_pop["cnt_move_ave7_milpop"] = trans_pop["cnt_move_ave7"] / (trans_pop["population"] / 1000)
trans_pop["inc_move_ave7_milpop"] = trans_pop["inc_move_ave7"] / (trans_pop["population"] / 1000)

In [37]:
pd.concat([trans_pop.head(),trans_pop.tail()])

Unnamed: 0,Province/State,country,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop,cnt_move_ave7_milpop,inc_move_ave7_milpop
0,,Afghanistan,2020-01-22,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341,0.0,0.0,,
1,,Afghanistan,2020-01-23,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341,0.0,0.0,,
2,,Afghanistan,2020-01-24,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341,0.0,0.0,,
3,,Afghanistan,2020-01-25,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341,0.0,0.0,,
4,,Afghanistan,2020-01-26,0,0.0,NA-Afghanistan,,,Afghanistan,38928.341,0.0,0.0,,
301015,,China,2022-11-20,3329533,33741.0,NA-China,3223036.0,31601.285714,China,1439323.774,2313.262005,23.442259,2239.271108,21.955648
301016,,China,2022-11-21,3364415,34882.0,NA-China,3256544.0,33508.0,China,1439323.774,2337.496997,24.234992,2262.551485,23.280377
301017,,China,2022-11-22,3373507,9092.0,NA-China,3287987.0,31443.142857,China,1439323.774,2343.813853,6.316855,2284.39726,21.845775
301018,,China,2022-11-23,3437108,63601.0,NA-China,3321855.0,33867.571429,China,1439323.774,2388.001965,44.188112,2307.927457,23.530197
301019,,China,2022-11-24,3449676,12568.0,NA-China,3356043.0,34188.142857,China,1439323.774,2396.733843,8.731878,2331.680377,23.75292


In [38]:
trans_pop.loc[trans_pop["country"]=="Japan"].tail()


Unnamed: 0,Province/State,country,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop,cnt_move_ave7_milpop,inc_move_ave7_milpop
162961,,Japan,2022-11-20,23793927,77722.0,NA-Japan,23529800.0,84296.0,Japan,126476.458,188129.295967,614.517525,186040.921996,666.495578
162962,,Japan,2022-11-21,23836351,42424.0,NA-Japan,23614790.0,84991.571429,Japan,126476.458,188464.725981,335.430013,186712.917186,671.99519
162963,,Japan,2022-11-22,23957587,121236.0,NA-Japan,23702840.0,88050.142857,Japan,126476.458,189423.291724,958.565744,187409.095307,696.178121
162964,,Japan,2022-11-23,24090948,133361.0,NA-Japan,23794550.0,91715.714286,Japan,126476.458,190477.725111,1054.433387,188134.255671,725.160364
162965,,Japan,2022-11-24,24151056,60108.0,NA-Japan,23881770.0,87216.0,Japan,126476.458,190952.975612,475.250501,188823.83855,689.582879


### 主要国の定義

In [39]:
"""
countries = pd.DataFrame({"country":[
    "Japan",
    "US",
    "India",
    "Sweden",
    "Brazil",
    "Israel",
    "United Kingdom",
    "France",
    "Russia",
]})
"""
countries = pd.DataFrame([
    ("Japan",""),
    ("US",""),
    ("India","20210415"),
    ("Sweden","20210511"),
    ("Brazil",""),
    ("Israel","20210315"),
    ("United Kingdom",""),
    ("France","20210425"),
    ("Russia",""),
],columns=["country","text_disp_date"])

In [40]:
#c_df = pd.merge(countries, trans_pop[trans_pop["Province/State"].isna()], left_on="country", right_on="Country/Region", how="inner")[["country","date","count","inc_count","move_ave7"]]
c_trans = pd.merge(countries, trans_pop[trans_pop["Province/State"].isna()], on="country", how="inner")


In [41]:
c_trans

Unnamed: 0,country,text_disp_date,Province/State,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop,cnt_move_ave7_milpop,inc_move_ave7_milpop
0,Japan,,,2020-01-22,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
1,Japan,,,2020-01-23,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
2,Japan,,,2020-01-24,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
3,Japan,,,2020-01-25,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
4,Japan,,,2020-01-26,4,2.0,NA-Japan,,,Japan,126476.458,0.031626,0.015813,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9337,Russia,,,2022-11-20,21230367,5153.0,NA-Russia,2.121439e+07,5132.142857,Russian Federation,145934.46,145478.778624,35.310372,145369.331254,35.16745
9338,Russia,,,2022-11-21,21234988,4621.0,NA-Russia,2.121954e+07,5146.714286,Russian Federation,145934.46,145510.443524,31.664899,145404.598554,35.267299
9339,Russia,,,2022-11-22,21239400,4412.0,NA-Russia,2.122469e+07,5148.428571,Russian Federation,145934.46,145540.676273,30.23275,145439.8776,35.279046
9340,Russia,,,2022-11-23,21244991,5591.0,NA-Russia,2.122988e+07,5185.142857,Russian Federation,145934.46,145578.987992,38.311719,145475.408227,35.530627


In [42]:
dates = trans["date"].unique()
len(dates)

1038

In [43]:
c_trans[c_trans["country"]=="Japan"].tail()

Unnamed: 0,country,text_disp_date,Province/State,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop,cnt_move_ave7_milpop,inc_move_ave7_milpop
1033,Japan,,,2022-11-20,23793927,77722.0,NA-Japan,23529800.0,84296.0,Japan,126476.458,188129.295967,614.517525,186040.921996,666.495578
1034,Japan,,,2022-11-21,23836351,42424.0,NA-Japan,23614790.0,84991.571429,Japan,126476.458,188464.725981,335.430013,186712.917186,671.99519
1035,Japan,,,2022-11-22,23957587,121236.0,NA-Japan,23702840.0,88050.142857,Japan,126476.458,189423.291724,958.565744,187409.095307,696.178121
1036,Japan,,,2022-11-23,24090948,133361.0,NA-Japan,23794550.0,91715.714286,Japan,126476.458,190477.725111,1054.433387,188134.255671,725.160364
1037,Japan,,,2022-11-24,24151056,60108.0,NA-Japan,23881770.0,87216.0,Japan,126476.458,190952.975612,475.250501,188823.83855,689.582879


In [44]:
print(any(c_trans["date"].isnull()))
print(any(c_trans["date"].isna()))

False
False


In [45]:
c_trans.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9342 entries, 0 to 9341
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   country                9342 non-null   object        
 1   text_disp_date         9342 non-null   object        
 2   Province/State         0 non-null      object        
 3   date                   9342 non-null   datetime64[ns]
 4   count                  9342 non-null   int64         
 5   inc_count              9341 non-null   float64       
 6   country_key            9342 non-null   object        
 7   cnt_move_ave7          9288 non-null   float64       
 8   inc_move_ave7          9281 non-null   float64       
 9   un_country             9342 non-null   object        
 10  population             9342 non-null   object        
 11  count_per_mil_pop      9342 non-null   object        
 12  inc_count_per_mil_pop  9341 non-null   object        
 13  cnt

### 東アジア

In [46]:
# 東Asia 主要国
e_asia_c = pd.DataFrame([
    ("Japan",""),
    ("China",""),
    ("Taiwan*",""),
    ("Korea, South","20210420"),
    ("Indonesia",""),
    ("Philippines",""),
    ("Vietnam",""),
],columns=["country","text_disp_date"])
e_asia_c

Unnamed: 0,country,text_disp_date
0,Japan,
1,China,
2,Taiwan*,
3,"Korea, South",20210420.0
4,Indonesia,
5,Philippines,
6,Vietnam,


In [47]:
e_a_trans = pd.merge(e_asia_c, trans_pop[trans_pop["Province/State"].isna()], on="country", how="inner")
e_a_trans

Unnamed: 0,country,text_disp_date,Province/State,date,count,inc_count,country_key,cnt_move_ave7,inc_move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop,cnt_move_ave7_milpop,inc_move_ave7_milpop
0,Japan,,,2020-01-22,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
1,Japan,,,2020-01-23,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
2,Japan,,,2020-01-24,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
3,Japan,,,2020-01-25,2,0.0,NA-Japan,,,Japan,126476.458,0.015813,0.0,,
4,Japan,,,2020-01-26,4,2.0,NA-Japan,,,Japan,126476.458,0.031626,0.015813,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7261,Vietnam,,,2022-11-20,11511452,274.0,NA-Vietnam,1.151034e+07,394.714286,Viet Nam,97338.583,118261.964015,2.814917,118250.531168,4.055065
7262,Vietnam,,,2022-11-21,11511822,370.0,NA-Vietnam,1.151076e+07,418.428571,Viet Nam,97338.583,118265.76518,3.801165,118254.829859,4.298692
7263,Vietnam,,,2022-11-22,11512138,316.0,NA-Vietnam,1.151114e+07,380.714286,Viet Nam,97338.583,118269.01158,3.2464,118258.741097,3.911237
7264,Vietnam,,,2022-11-23,11512684,546.0,NA-Vietnam,1.151153e+07,387.000000,Viet Nam,97338.583,118274.620866,5.609287,118262.71691,3.975813


In [48]:
e_a_trans.count()

country                  7266
text_disp_date           7266
Province/State              0
date                     7266
count                    7266
inc_count                7266
country_key              7266
cnt_move_ave7            7224
inc_move_ave7            7224
un_country               7266
population               7266
count_per_mil_pop        7266
inc_count_per_mil_pop    7266
cnt_move_ave7_milpop     7224
inc_move_ave7_milpop     7224
dtype: int64

## チャート化

In [49]:
# 主要国　新規感染者数　全期間
plt.figure(figsize=(9.5,5))
plt.grid()
dates = c_trans["date"].unique()
for c in countries["country"]:
    print(c)
    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["inc_count"],label=c, fmt="-")
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

Japan
US
India
Sweden
Brazil
Israel
United Kingdom
France
Russia


In [50]:
# 主要国　新規感染者数７日間移動平均　全期間
plt.figure(figsize=(9.5,5))
plt.grid()
dates = c_trans["date"].unique()
for c in countries["country"]:
    print(c)
    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["cnt_move_ave7"],label=c, fmt="-")
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

Japan
US
India
Sweden
Brazil
Israel
United Kingdom
France
Russia


In [51]:
# 主要国　１００万人あたり感染者数累計 全期間　７日間移動平均
plt.figure(figsize=(9.5,5))
plt.grid()
plt.title("positives per million poplulasions transition(1w moving-ave)")
dates = c_trans["date"].unique()
for c in countries["country"]:
    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["cnt_move_ave7_milpop"],label=c, fmt="-")
    
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

In [52]:
# 主要国　１００万人あたり感染者数累計 1月以降　７日間移動平均
plt.figure(figsize=(9.5,5))
plt.grid()
plt.title("positives per million poplulasions transition(1w moving-ave)(21/1/1-)")
dates = c_trans.loc[c_trans["date"]>="2021-01-01"]["date"].unique()
for c in countries["country"]:
    plt.plot_date(dates,c_trans.loc[(c_trans["country"]==c) & (c_trans["date"]>="2021-01-01")]["cnt_move_ave7_milpop"],label=c, fmt="-")
    
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

## 人口当たりの新規感染者数

In [53]:
# 主要国　１００万人あたり新規感染者数 全期間　７日間移動平均
"""
plt.figure(figsize=(9.5,5))
plt.grid()
plt.title("increase positives per million poplulasions transition(1w moving-ave)")
dates = c_trans["date"].unique()
for c in countries["country"]:
    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["inc_move_ave7_milpop"],label=c, fmt="-")
    plt.text(dates.max(),c_trans.loc[(c_trans["country"]==c) & (c_trans["date"]==dates.max())]["inc_move_ave7_milpop"],c,fontsize=8)

plt.legend()
plt.tight_layout()
"""

'\nplt.figure(figsize=(9.5,5))\nplt.grid()\nplt.title("increase positives per million poplulasions transition(1w moving-ave)")\ndates = c_trans["date"].unique()\nfor c in countries["country"]:\n    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["inc_move_ave7_milpop"],label=c, fmt="-")\n    plt.text(dates.max(),c_trans.loc[(c_trans["country"]==c) & (c_trans["date"]==dates.max())]["inc_move_ave7_milpop"],c,fontsize=8)\n\nplt.legend()\nplt.tight_layout()\n'

## 人口100万人当たりの新規感染者数（3月以降）

In [54]:
# 主要国　１００万人あたり感染者数累計 3月以降　７日間移動平均
fig=plt.figure(figsize=(9.5,10))
plt.subplot(2,1,1) # (rows, columns, panel number)
plt.grid()
plt.title("主要国の人口100万人当たりの新規感染者数７日間移動平均({}時点)".format(latest_date_str))
chart_data = c_trans[c_trans.date >= "2021-03-01"]
dates = chart_data["date"].unique()
for i,cr in countries.iterrows():
    c = cr["country"]
    text_disp_date = pd.to_datetime(cr["text_disp_date"]) if cr["text_disp_date"]!="" else dates.max()
    plt.plot_date(dates,chart_data.loc[chart_data["country"]==c]["inc_move_ave7_milpop"],label=c, fmt="-")
#    plt.text(dates.max(),chart_data.loc[(chart_data["country"]==c) & (chart_data["date"]==dates.max())]["inc_move_ave7_milpop"],c
    plt.text(text_disp_date,chart_data.loc[(chart_data["country"]==c) & (chart_data["date"]==text_disp_date)]["inc_move_ave7_milpop"],c
            ,fontsize=8,va="center")
plt.legend(bbox_to_anchor=(0, 1),loc='upper left', fontsize=9)

plt.subplot(2,1,2) # (rows, columns, panel number)
plt.grid()
plt.title("東・東南アジア主要国の人口100万人当たりの新規感染者数７日間移動平均({}時点)".format(latest_date_str))
chart_data = e_a_trans[e_a_trans.date >= "2021-03-01"]
dates = chart_data["date"].unique()
for i,cr in e_asia_c.iterrows():
    c = cr["country"]
    text_disp_date = pd.to_datetime(cr["text_disp_date"]) if cr["text_disp_date"]!="" else dates.max()
    plt.plot_date(dates,chart_data.loc[chart_data["country"]==c]["inc_move_ave7_milpop"],label=c, fmt="-")
#    plt.text(dates.max(),chart_data.loc[(chart_data["country"]==c) & (chart_data["date"]==dates.max())]["inc_move_ave7_milpop"],c
    plt.text(text_disp_date,chart_data.loc[(chart_data["country"]==c) & (chart_data["date"]==text_disp_date)]["inc_move_ave7_milpop"],c
             ,fontsize=8,va="center")
plt.legend(bbox_to_anchor=(0, 1),loc='upper left', fontsize=9)
plt.text(dates.max(),-1,"※ChinaはProvince/State毎の値を合算",ha="right",va="top",fontsize=8)

fig.text(0,0
         ,"※感染者の情報提供:COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University,"\
         +"※人口の情報提供:WHO",
        fontsize=7,ha="left",va="bottom")
fig.tight_layout()


<IPython.core.display.Javascript object>

  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()


In [55]:
fig.savefig("out/covid-global-countries-transition.png")

  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png")
  fig.savefig("out/covid-global-countries-transition.png