In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,5/28/21,5/29/21,5/30/21,5/31/21,6/1/21,6/2/21,6/3/21,6/4/21,6/5/21,6/6/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,69130,70111,70761,71838,72977,74026,75119,76628,77963,79224
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,132285,132297,132309,132315,132337,132351,132360,132372,132374,132379
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,128198,128456,128725,128913,129218,129640,129976,130361,130681,130958
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,13693,13693,13693,13727,13729,13744,13752,13758,13758,13758
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,33944,34180,34366,34551,34752,34960,35140,35307,35594,35772


In [3]:
del df["Lat"]
del df["Long"]

In [4]:
countries_df = df[["Province/State","Country/Region"]]
countries_df.head()

Unnamed: 0,Province/State,Country/Region
0,,Afghanistan
1,,Albania
2,,Algeria
3,,Andorra
4,,Angola


In [5]:
days_df = df.copy()
del days_df["Province/State"]
del days_df["Country/Region"]

In [6]:
days_df = days_df.stack().reset_index()
days_df.columns=["country_id", "date", "count"]
days_df.head()

Unnamed: 0,country_id,date,count
0,0,1/22/20,0
1,0,1/23/20,0
2,0,1/24/20,0
3,0,1/25/20,0
4,0,1/26/20,0


In [7]:
df = pd.merge(countries_df, days_df, left_index=True, right_on="country_id")
del df["country_id"]
df["date"] = pd.to_datetime(df["date"])
df.head()

Unnamed: 0,Province/State,Country/Region,date,count
0,,Afghanistan,2020-01-22,0
1,,Afghanistan,2020-01-23,0
2,,Afghanistan,2020-01-24,0
3,,Afghanistan,2020-01-25,0
4,,Afghanistan,2020-01-26,0


In [8]:
global_df = df.groupby(["date"],as_index=False).sum("count")

In [9]:
global_df.tail()

Unnamed: 0,date,count
497,2021-06-02,171680812
498,2021-06-03,172169929
499,2021-06-04,172589646
500,2021-06-05,172987591
501,2021-06-06,173310648


In [10]:
latest_date_str = global_df.date.max().strftime("%Y-%m-%d")

In [11]:
import matplotlib.pyplot as plt
%matplotlib notebook
#%matplotlib inline

In [12]:
df["Country/Region"].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece',
       'Grenada', 'Guatemala', 'Guin

In [13]:
#plt.figure(figsize=(9.5,5))
#plt.bar(global_df["date"],global_df["count"]/1000,label="positive")
#plt.legend()

In [14]:
trans = pd.DataFrame()
trans["date"] = global_df.iloc[1:]["date"]
trans["count1"] = global_df["count"].iloc[1:]
trans = trans.reset_index().drop(columns="index")
trans["count"] = trans["count1"] - global_df["count"].iloc[0:]
del trans["count1"]
trans["move_ave7"] = trans["count"].rolling(7).mean()


In [15]:
trans.tail()

Unnamed: 0,date,count,move_ave7
496,2021-06-02,493212.0,465881.714286
497,2021-06-03,489117.0,456570.285714
498,2021-06-04,419717.0,445126.428571
499,2021-06-05,397945.0,433281.714286
500,2021-06-06,323057.0,423567.285714


In [16]:
fig=plt.figure(figsize=(9.5,8))
plt.subplot(2,1,1) # (rows, columns, panel number)
plt.bar(trans["date"],trans["count"],label="新規感染者数")
plt.plot_date(trans["date"],trans["move_ave7"],label="７日間移動平均", fmt="-", color="orange")
plt.title("全世界の新規感染者数の推移({}時点)".format(latest_date_str))
plt.ylabel("人")
plt.grid(axis="y")
plt.legend()

plt.subplot(2,1,2) # (rows, columns, panel number)
plt.title("全世界の新規感染者数の推移（3月以降）({}時点)".format(latest_date_str))
chart_data = trans[trans.date >= "2021-03-01"]
plt.bar(chart_data["date"],chart_data["count"],label="新規感染者数")
plt.plot_date(chart_data["date"],chart_data["move_ave7"],label="７日間移動平均", fmt="-", color="orange")
plt.grid(axis="y")
plt.legend()
#plt.legend(bbox_to_anchor=(0, 1),loc='upper left', fontsize=9)
fig.text(0,0
         ,"※感染者の情報提供:COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University",
        fontsize=7,ha="left",va="bottom")

fig.tight_layout()

<IPython.core.display.Javascript object>

In [17]:
fig.savefig("out/covid-global-transition.png")

## 国別

In [18]:
df.tail()

Unnamed: 0,Province/State,Country/Region,date,count
138547,,Zimbabwe,2021-06-02,39031
138548,,Zimbabwe,2021-06-03,39092
138549,,Zimbabwe,2021-06-04,39144
138550,,Zimbabwe,2021-06-05,39168
138551,,Zimbabwe,2021-06-06,39189


In [19]:
df["Country/Region"].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece',
       'Grenada', 'Guatemala', 'Guin

In [20]:
countries = pd.DataFrame({"country":[
    "Japan",
    "US",
    "India",
    "Sweden",
    "Brazil",
    "Israel",
    "Korea, South",
    "Taiwan*",
    "United Kingdom",
    "France",
]})
dates = df["date"].unique()

In [21]:
c_df = pd.merge(countries, df[df["Province/State"].isna()], left_on="country", right_on="Country/Region", how="inner")[["country","date","count"]]

In [22]:
c_df

Unnamed: 0,country,date,count
0,Japan,2020-01-22,2
1,Japan,2020-01-23,2
2,Japan,2020-01-24,2
3,Japan,2020-01-25,2
4,Japan,2020-01-26,4
...,...,...,...
5015,France,2021-06-02,5619133
5016,France,2021-06-03,5634526
5017,France,2021-06-04,5641296
5018,France,2021-06-05,5647950


In [23]:
c_df.groupby("country").count()

Unnamed: 0_level_0,date,count
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Brazil,502,502
France,502,502
India,502,502
Israel,502,502
Japan,502,502
"Korea, South",502,502
Sweden,502,502
Taiwan*,502,502
US,502,502
United Kingdom,502,502


In [24]:
len(dates)

502

In [25]:
c_trans = c_df.copy()
c_trans["inc_count"] = 0
for c in countries["country"]:
    yday_t = c_trans.loc[c_trans["country"]==c].reset_index()
    t = yday_t.iloc[1:].reset_index()
    t["inc_count"] = t["count"] - t.join(yday_t,rsuffix="_y")["count_y"]
    c_trans.loc[(c_trans["country"]==c) & (c_trans["date"]>=t["date"].min()),"inc_count"] = t["inc_count"].values.tolist()
    c_trans.loc[(c_trans["country"]==c),"move_ave7"] = c_trans.loc[c_trans["country"]==c]["inc_count"].rolling(7).mean()
pd.concat([c_trans.head(2),c_trans.tail(2)])


Unnamed: 0,country,date,count,inc_count,move_ave7
0,Japan,2020-01-22,2,0,
1,Japan,2020-01-23,2,0,
5018,France,2021-06-05,5647950,6654,6799.571429
5019,France,2021-06-06,5652705,4755,6336.857143


In [26]:
## 異常値？の訂正
import numpy as np
c_trans.loc[(c_trans.country=="France") & (c_trans.inc_count < -300000),"inc_count"]=np.nan


In [27]:
c_trans[c_trans["country"]=="Japan"].tail()

Unnamed: 0,country,date,count,inc_count,move_ave7
497,Japan,2021-06-02,752865,3036.0,3113.428571
498,Japan,2021-06-03,755713,2848.0,2925.571429
499,Japan,2021-06-04,758290,2577.0,2765.0
500,Japan,2021-06-05,760953,2663.0,2630.571429
501,Japan,2021-06-06,762980,2027.0,2512.571429


In [28]:
print(any(c_trans["date"].isnull()))
print(any(c_trans["date"].isna()))

False
False


In [29]:
c_trans.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5020 entries, 0 to 5019
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   country    5020 non-null   object        
 1   date       5020 non-null   datetime64[ns]
 2   count      5020 non-null   int64         
 3   inc_count  5019 non-null   float64       
 4   move_ave7  4960 non-null   float64       
dtypes: datetime64[ns](1), float64(2), int64(1), object(1)
memory usage: 235.3+ KB


In [30]:
plt.figure(figsize=(9.5,5))
plt.grid()
dates = c_trans["date"].unique()
for c in countries["country"]:
    print(c)
    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["inc_count"],label=c, fmt="-")
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

Japan
US
India
Sweden
Brazil
Israel
Korea, South
Taiwan*
United Kingdom
France


In [31]:
plt.figure(figsize=(9.5,5))
plt.grid()
dates = c_trans["date"].unique()
for c in countries["country"]:
    print(c)
    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["move_ave7"],label=c, fmt="-")
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

Japan
US
India
Sweden
Brazil
Israel
Korea, South
Taiwan*
United Kingdom
France


## 人口当たり

In [32]:
pop = pd.read_excel("world-data/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx",header=16)
pop_latest = pop.loc[pop["Type"]=="Country/Area"][[pop.columns[2],pop.columns[-1]]]
pop_latest.columns = ["un_country","population"]
pop_latest.head(1)

Unnamed: 0,un_country,population
26,Burundi,11890.781


In [33]:
alternates = pd.DataFrame([
    ("US","United States of America"),
    ("Russia","Russian Federation"),
    ("Iran","Iran (Islamic Republic of)"),
    ("Bolivia","Bolivia (Plurinational State of)"),
    ("Moldova","Republic of Moldova"),
    ("Venezuela","Venezuela (Bolivarian Republic of)"),
    ("Korea, South","Republic of Korea"),
    ("Congo (Kinshasa)","Democratic Republic of the Congo"),
    ("Congo (Brazzaville)","Congo"),
    ("Vietnam","Viet Nam"),
    ("Laos","Lao People's Democratic Republic"),
    ("Taiwan*","China, Taiwan Province of China"),
    ("Tanzania","United Republic of Tanzania"),
    ("Brunei","Brunei Darussalam"),
    ("Micronesia","Micronesia (Fed. States of)"),
    ("Syria","Syrian Arab Republic"),
], columns = ["covid_country","un_country"])

In [34]:
alternates.head()

Unnamed: 0,covid_country,un_country
0,US,United States of America
1,Russia,Russian Federation
2,Iran,Iran (Islamic Republic of)
3,Bolivia,Bolivia (Plurinational State of)
4,Moldova,Republic of Moldova


In [35]:
c_trans_pop = pd.merge(c_trans, alternates, left_on="country", right_on="covid_country", how="left")
#c_trans_pop["country"].replace(renames,inplace=True)
del c_trans_pop["covid_country"]

In [36]:
c_trans_pop.head()

Unnamed: 0,country,date,count,inc_count,move_ave7,un_country
0,Japan,2020-01-22,2,0.0,,
1,Japan,2020-01-23,2,0.0,,
2,Japan,2020-01-24,2,0.0,,
3,Japan,2020-01-25,2,0.0,,
4,Japan,2020-01-26,4,2.0,,


In [37]:
c_trans_pop.isnull().any(axis=0)

country       False
date          False
count         False
inc_count      True
move_ave7      True
un_country     True
dtype: bool

In [38]:
c_trans_pop["un_country"].fillna(c_trans_pop["country"],inplace=True)

In [39]:
c_trans_pop.isnull().any(axis=0)

country       False
date          False
count         False
inc_count      True
move_ave7      True
un_country    False
dtype: bool

In [40]:
c_trans_pop = pd.merge(c_trans_pop, pop_latest, on="un_country", how="left")

In [41]:
c_trans_pop.head()

Unnamed: 0,country,date,count,inc_count,move_ave7,un_country,population
0,Japan,2020-01-22,2,0.0,,Japan,126476.458
1,Japan,2020-01-23,2,0.0,,Japan,126476.458
2,Japan,2020-01-24,2,0.0,,Japan,126476.458
3,Japan,2020-01-25,2,0.0,,Japan,126476.458
4,Japan,2020-01-26,4,2.0,,Japan,126476.458


In [42]:
c_trans_pop["count_per_mil_pop"] = c_trans_pop["count"] / (c_trans_pop["population"] / 1000)
c_trans_pop["inc_count_per_mil_pop"] = c_trans_pop["inc_count"] / (c_trans_pop["population"] / 1000)

In [43]:
c_trans_pop.head()

Unnamed: 0,country,date,count,inc_count,move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop
0,Japan,2020-01-22,2,0.0,,Japan,126476.458,0.015813,0.0
1,Japan,2020-01-23,2,0.0,,Japan,126476.458,0.015813,0.0
2,Japan,2020-01-24,2,0.0,,Japan,126476.458,0.015813,0.0
3,Japan,2020-01-25,2,0.0,,Japan,126476.458,0.015813,0.0
4,Japan,2020-01-26,4,2.0,,Japan,126476.458,0.031626,0.015813


In [44]:
#c_trans_pop["move_ave7_milpop"] = 0
for c in countries["country"]:
    c_trans_pop.loc[(c_trans_pop["country"]==c),"move_ave7_milpop"] = c_trans_pop.loc[c_trans_pop["country"]==c]["count_per_mil_pop"].rolling(7).mean()
    c_trans_pop.loc[(c_trans_pop["country"]==c),"inc_move_ave7_milpop"] = c_trans_pop.loc[c_trans_pop["country"]==c]["inc_count_per_mil_pop"].rolling(7).mean()

In [45]:
c_trans_pop.loc[c_trans_pop["country"]=="Japan"].tail()


Unnamed: 0,country,date,count,inc_count,move_ave7,un_country,population,count_per_mil_pop,inc_count_per_mil_pop,move_ave7_milpop,inc_move_ave7_milpop
497,Japan,2021-06-02,752865,3036.0,3113.428571,Japan,126476.458,5952.609773,24.004467,5887.015409,24.616665
498,Japan,2021-06-03,755713,2848.0,2925.571429,Japan,126476.458,5975.127798,22.518025,5910.146761,23.131352
499,Japan,2021-06-04,758290,2577.0,2765.0,Japan,126476.458,5995.503131,20.375333,5932.008537,21.861776
500,Japan,2021-06-05,760953,2663.0,2630.571429,Japan,126476.458,6016.558433,21.055302,5952.807439,20.798902
501,Japan,2021-06-06,762980,2027.0,2512.571429,Japan,126476.458,6032.585131,16.026698,5972.673361,19.865922


In [46]:
plt.figure(figsize=(9.5,5))
plt.grid()
plt.title("positives per million poplulasions transition(1w moving-ave)")
dates = c_trans_pop["date"].unique()
for c in countries["country"]:
    plt.plot_date(dates,c_trans_pop.loc[c_trans_pop["country"]==c]["move_ave7_milpop"],label=c, fmt="-")

#for c in countries["country"]:
#    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["move_ave7"],label=c, fmt="-")
    
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

In [47]:
plt.figure(figsize=(9.5,5))
plt.grid()
plt.title("positives per million poplulasions transition(1w moving-ave)(21/1/1-)")
dates = c_trans_pop.loc[c_trans_pop["date"]>="2021-01-01"]["date"].unique()
for c in countries["country"]:
    plt.plot_date(dates,c_trans_pop.loc[(c_trans_pop["country"]==c) & (c_trans_pop["date"]>="2021-01-01")]["move_ave7_milpop"],label=c, fmt="-")

#for c in countries["country"]:
#    plt.plot_date(dates,c_trans.loc[c_trans["country"]==c]["move_ave7"],label=c, fmt="-")
    
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

## 人口当たりの新規感染者数

In [48]:
plt.figure(figsize=(9.5,5))
plt.grid()
plt.title("increase positives per million poplulasions transition(1w moving-ave)")
dates = c_trans_pop["date"].unique()
for c in countries["country"]:
    plt.plot_date(dates,c_trans_pop.loc[c_trans_pop["country"]==c]["inc_move_ave7_milpop"],label=c, fmt="-")
    plt.text(dates.max(),c_trans_pop.loc[(c_trans_pop["country"]==c) & (c_trans_pop["date"]==dates.max())]["inc_move_ave7_milpop"],c,fontsize=8)

plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

## 人口100万人当たりの新規感染者数（3月以降）

In [49]:
fig=plt.figure(figsize=(9.5,5))
plt.grid()
plt.title("主要国の人口100万人当たりの新規感染者数７日間移動平均（3月以降）({}時点)".format(latest_date_str))
chart_data = c_trans_pop[c_trans_pop.date >= "2021-03-01"]
dates = chart_data["date"].unique()
for c in countries["country"]:
    plt.plot_date(dates,chart_data.loc[chart_data["country"]==c]["inc_move_ave7_milpop"],label=c, fmt="-")
    plt.text(dates.max(),chart_data.loc[(chart_data["country"]==c) & (chart_data["date"]==dates.max())]["inc_move_ave7_milpop"],c,fontsize=8)
plt.legend(bbox_to_anchor=(0, 1),loc='upper left', fontsize=9)
plt.tight_layout()

<IPython.core.display.Javascript object>

In [50]:
fig.savefig("out/covid-countries-transition.png")