# 世界の最新の感染者数

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
del df["Lat"]
del df["Long"]

In [3]:
df.head()

Unnamed: 0,Province/State,Country/Region,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,11/8/22,11/9/22,11/10/22,11/11/22,11/12/22,11/13/22,11/14/22,11/15/22,11/16/22,11/17/22
0,,Afghanistan,0,0,0,0,0,0,0,0,...,203942,204094,204287,204392,204417,204510,204610,204724,204820,204982
1,,Albania,0,0,0,0,0,0,0,0,...,333071,333088,333103,333125,333138,333156,333161,333197,333215,333233
2,,Algeria,0,0,0,0,0,0,0,0,...,270891,270906,270917,270924,270929,270939,270952,270969,270981,270996
3,,Andorra,0,0,0,0,0,0,0,0,...,46588,46664,46664,46664,46664,46664,46664,46664,46824,46824
4,,Angola,0,0,0,0,0,0,0,0,...,103131,103131,103131,103131,103131,103131,103131,103131,103131,103131


In [4]:
latest_date = df.tail(1).columns[-1]
latest_date

'11/17/22'

In [5]:
latest_column_key = df.columns[-1]
latest_column_key2 = df.columns[-2]
df = df[['Province/State','Country/Region',latest_column_key2,latest_column_key]]
df = df.rename(columns={latest_column_key:"latest_confirmed"})
df = df.rename(columns={latest_column_key2:"latest_confirmed2"})
df["new_confirmed"] = df.latest_confirmed - df.latest_confirmed2
df.head()

Unnamed: 0,Province/State,Country/Region,latest_confirmed2,latest_confirmed,new_confirmed
0,,Afghanistan,204820,204982,162
1,,Albania,333215,333233,18
2,,Algeria,270981,270996,15
3,,Andorra,46824,46824,0
4,,Angola,103131,103131,0


## 全世界の最新の感染者数

In [6]:
print("全世界感染者数({}時点):{:,}人".format(latest_date,df["latest_confirmed"].sum()))
print("全世界感染者数平均({}時点):{:,}人".format(latest_date,df["latest_confirmed"].mean()))
print("全世界の新規感染者数({}時点):{:,}人".format(latest_date,df["new_confirmed"].sum()))
print("全世界の新規感染者数平均({}時点):{:,}人".format(latest_date,df["new_confirmed"].mean()))


全世界感染者数(11/17/22時点):636,978,308人
全世界感染者数平均(11/17/22時点):2,204,077.190311419人
全世界の新規感染者数(11/17/22時点):468,987人
全世界の新規感染者数平均(11/17/22時点):1,622.7923875432525人


## 国別の最新の感染者数

In [7]:
country = df.groupby(["Country/Region"],as_index=False).sum("latest_confirmed")
country = country.sort_values("latest_confirmed", ascending=False)

In [8]:
country.head()

Unnamed: 0,Country/Region,latest_confirmed2,latest_confirmed,new_confirmed
186,US,98193437,98245940,52503
80,India,44668535,44667967,-568
63,France,37437677,37471008,33331
67,Germany,36152490,36180077,27587
24,Brazil,34938073,34971043,32970


In [9]:
import matplotlib.pyplot as plt
%matplotlib notebook
#%matplotlib inline

In [10]:
plt.figure(figsize=(9.5,5))
plt.title("Worst 50")
plt.xticks(rotation="90",fontsize=9)
plt.ylabel("count (1000 people)")
#fig.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.bar(country.head(50)["Country/Region"],country.head(50)["latest_confirmed"]/1000,label="positive")
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

## ヒストグラム 

In [11]:
plt.figure(figsize=(10,5))
plt.title("histgram(million)")
#plt.xticks(rotation="90",fontsize=9)
#plt.ylabel("count (1000 people)")
#fig.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.hist(country["latest_confirmed"]/1000000,bins=20)

<IPython.core.display.Javascript object>

(array([174.,  13.,   3.,   1.,   4.,   1.,   0.,   3.,   0.,   1.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   1.]),
 array([1.00000000e-06, 4.91229795e+00, 9.82459490e+00, 1.47368918e+01,
        1.96491888e+01, 2.45614857e+01, 2.94737827e+01, 3.43860796e+01,
        3.92983766e+01, 4.42106735e+01, 4.91229705e+01, 5.40352674e+01,
        5.89475644e+01, 6.38598613e+01, 6.87721583e+01, 7.36844552e+01,
        7.85967522e+01, 8.35090491e+01, 8.84213461e+01, 9.33336430e+01,
        9.82459400e+01]),
 <BarContainer object of 20 artists>)

## 人口当たりの分析

In [12]:
pop = pd.read_excel("world-data/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx",header=16)
pop.head(1)

Unnamed: 0,Index,Variant,"Region, subregion, country or area *",Notes,Country code,Type,Parent code,1950,1951,1952,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,1,Estimates,WORLD,,900,World,0,2536431.018,2584034.227,2630861.69,...,7041194.168,7125827.957,7210582.041,7295290.759,7379796.967,7464021.934,7547858.9,7631091.113,7713468.205,7794798.729


In [13]:
pop.iloc[:,-1]

0      7794798.729
1              ...
2      1273304.261
3      6521494.468
4      1057438.163
          ...     
284         62.273
285      37742.157
286         56.772
287          5.795
288     331002.647
Name: 2020, Length: 289, dtype: object

In [14]:
pop_latest = pop.loc[pop["Type"]=="Country/Area"][[pop.columns[2],pop.columns[-1]]]

In [15]:
pop_latest.columns = ["country","population"]

In [16]:
pop_latest.head()

Unnamed: 0,country,population
26,Burundi,11890.781
27,Comoros,869.595
28,Djibouti,988.002
29,Eritrea,3546.427
30,Ethiopia,114963.583


In [17]:
country.head()

Unnamed: 0,Country/Region,latest_confirmed2,latest_confirmed,new_confirmed
186,US,98193437,98245940,52503
80,India,44668535,44667967,-568
63,France,37437677,37471008,33331
67,Germany,36152490,36180077,27587
24,Brazil,34938073,34971043,32970


### 世界全体での人口10万人あたり陽性者数

In [18]:
country.latest_confirmed.sum()

636978308

In [19]:
pop_latest.population.sum()

7794798.729

In [20]:
print("****世界全体での人口10万人あたり陽性者数:{}****".format(country.latest_confirmed.sum() / (pop_latest.population.sum() / 100)))

****世界全体での人口10万人あたり陽性者数:8171.837787551422****


### 各国の人口１０万人あたり陽性者数

In [21]:
c_merged = pd.merge(country, pop_latest, left_on="Country/Region", right_on="country", how="left")
c_merged.loc[c_merged["country"].isna()]

Unnamed: 0,Country/Region,latest_confirmed2,latest_confirmed,new_confirmed,country,population
0,US,98193437,98245940,52503,,
5,"Korea, South",26412901,26462319,49418,,
9,Russia,21208695,21214339,5644,,
12,Vietnam,11509975,11510484,509,,
16,Taiwan*,8092118,8112234,20116,,
17,Iran,7559055,7559164,109,,
65,Bolivia,1110014,1110014,0,,
77,West Bank and Gaza,703036,703036,0,,
82,Burma,632865,632910,45,,
85,Moldova,594441,594441,0,,


In [22]:
p_merged = pd.merge(pop_latest, country, left_on="country", right_on="Country/Region", how="left")
len(p_merged.loc[p_merged["Country/Region"].isna()])

60

In [23]:
pop_latest.loc[pop_latest["country"].str.contains("Syria")]

Unnamed: 0,country,population
104,Syrian Arab Republic,17500.657


In [24]:
renames = {
    "US":"United States of America",
    "Russia":"Russian Federation",
    "Iran":"Iran (Islamic Republic of)",
    "Bolivia":"Bolivia (Plurinational State of)",
    "Moldova":"Republic of Moldova",
    "Venezuela":"Venezuela (Bolivarian Republic of)",
    "Korea, South":"Republic of Korea",
    "Congo (Kinshasa)":"Democratic Republic of the Congo",
    "Congo (Brazzaville)":"Congo",
    "Vietnam":"Viet Nam",
    "Laos":"Lao People's Democratic Republic",
    "Taiwan*":"China, Taiwan Province of China",
    "Tanzania":"United Republic of Tanzania",
    "Brunei":"Brunei Darussalam",
    "Micronesia":"Micronesia (Fed. States of)",
    "Syria":"Syrian Arab Republic",
}

In [25]:
country_renamed = country.copy()
country_renamed["Country/Region"].replace(renames,inplace=True)

In [26]:
c_merged = pd.merge(country_renamed, pop_latest, left_on="Country/Region", right_on="country", how="left")
c_merged.loc[c_merged["country"].isna()]

Unnamed: 0,Country/Region,latest_confirmed2,latest_confirmed,new_confirmed,country,population
77,West Bank and Gaza,703036,703036,0,,
82,Burma,632865,632910,45,,
102,Kosovo,272195,272195,0,,
130,Cote d'Ivoire,87864,87864,0,,
194,Summer Olympics 2020,865,865,0,,
195,Diamond Princess,712,712,0,,
196,Winter Olympics 2022,535,535,0,,
198,Antarctica,11,11,0,,
199,MS Zaandam,9,9,0,,
200,"Korea, North",1,1,0,,


In [27]:
c_merged["positives per 1000 population"] = c_merged["latest_confirmed"] / (c_merged["population"])
c_merged["positives per 100,000 population"] = (c_merged["latest_confirmed"] / (c_merged["population"]/100)).astype(float)
c_merged["new positives per 100,000 population"] = (c_merged["new_confirmed"] / (c_merged["population"]/100)).astype(float)

In [28]:
c_merged = c_merged.sort_values("positives per 1000 population", ascending=False)

In [29]:
c_merged.loc[c_merged["Country/Region"]=="United States of America"]

Unnamed: 0,Country/Region,latest_confirmed2,latest_confirmed,new_confirmed,country,population,positives per 1000 population,"positives per 100,000 population","new positives per 100,000 population"
0,United States of America,98193437,98245940,52503,United States of America,331002.647,296.813155,29681.315509,15.861807


In [30]:
fig = plt.figure(figsize=(12,8))

plt.subplot(1,2,1) # (rows, columns, panel number)
plt.title("positives per pupulation WORST 50")
#plt.xticks(rotation="90",fontsize=9)
plt.yticks(fontsize=9)
plt.xlabel("count per 100,000 population")
plt.grid(axis="x")
#fig.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
#plt.bar(c_merged.head(50)["Country/Region"],c_merged.head(50)["positives per 100,000 population"],label="positive")
chart_data=c_merged.head(50).sort_values("positives per 100,000 population",ascending=True)
plt.barh(chart_data["Country/Region"],chart_data["positives per 100,000 population"],label="positive")
#plt.text(0,chart_data["positives per 100,000 population"],chart_data["population"].astype(str))
#plt.text(0,chart_data["positives per 100,000 population"].all(),chart_data["population"].astype(str).all())
plt.legend()

plt.subplot(1,2,2) # (rows, columns, panel number)
plt.title("positives per pupulation BEST 50")
#plt.xticks(rotation="90",fontsize=9)
plt.yticks(fontsize=9)
plt.xlabel("count per 100,000 population")
plt.grid(axis="x")
#fig.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
c_asc = c_merged.sort_values("positives per 1000 population")
chart_data=c_asc.head(50).sort_values("positives per 100,000 population",ascending=False)
plt.barh(chart_data["Country/Region"],chart_data["positives per 100,000 population"],label="positive")
plt.legend()

fig.tight_layout()


<IPython.core.display.Javascript object>

In [31]:
#fig.savefig("out/covid-global-tops.png")

In [32]:
plt.figure(figsize=(9.5,5))
plt.title("histgram(positives per 100,000 pop)")
plt.hist(c_merged["positives per 100,000 population"],bins=[0,2000,4000,8000,10000,12000,14000,16000,18000])

<IPython.core.display.Javascript object>

(array([55., 16., 15., 11.,  7.,  9.,  6., 10.]),
 array([    0,  2000,  4000,  8000, 10000, 12000, 14000, 16000, 18000]),
 <BarContainer object of 8 artists>)

In [33]:
c_merged.describe()

Unnamed: 0,latest_confirmed2,latest_confirmed,new_confirmed,"positives per 100,000 population","new positives per 100,000 population"
count,201.0,201.0,201.0,191.0,191.0
mean,3166713.0,3169046.0,2333.268657,16504.59434,8.455027
std,9498694.0,9505141.0,10106.076105,17999.70077,29.956191
min,1.0,1.0,-568.0,38.95626,-0.041159
25%,37439.0,37439.0,0.0,1300.147081,0.0
50%,304556.0,304770.0,0.0,9703.325719,0.0
75%,1677386.0,1677386.0,46.0,26096.154448,2.261707
max,98193440.0,98245940.0,91606.0,64228.887972,232.620693


In [34]:
c_merged.head()

Unnamed: 0,Country/Region,latest_confirmed2,latest_confirmed,new_confirmed,country,population,positives per 1000 population,"positives per 100,000 population","new positives per 100,000 population"
161,San Marino,21798,21798,0,San Marino,33.938,642.28888,64228.887972,0.0
24,Austria,5502186,5506697,4511,Austria,9006.4,611.420434,61142.043436,50.086605
145,Andorra,46824,46824,0,Andorra,77.265,606.018249,60601.824888,0.0
111,Iceland,206803,206803,0,Iceland,341.25,606.016117,60601.611722,0.0
59,Slovenia,1244721,1245626,905,Slovenia,2078.932,599.166303,59916.63027,43.531967


In [35]:
c_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 201 entries, 161 to 200
Data columns (total 9 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Country/Region                        201 non-null    object 
 1   latest_confirmed2                     201 non-null    int64  
 2   latest_confirmed                      201 non-null    int64  
 3   new_confirmed                         201 non-null    int64  
 4   country                               191 non-null    object 
 5   population                            191 non-null    object 
 6   positives per 1000 population         191 non-null    object 
 7   positives per 100,000 population      191 non-null    float64
 8   new positives per 100,000 population  191 non-null    float64
dtypes: float64(2), int64(3), object(4)
memory usage: 15.7+ KB


In [36]:
plt.figure(figsize=(9.5,5))
plt.title("histgram(positives per 100,000 pop)")
plt.hist(c_merged["positives per 100,000 population"],bins=200)

<IPython.core.display.Javascript object>

(array([24., 16.,  4.,  4.,  3.,  4.,  1.,  2.,  2.,  6.,  2.,  2.,  1.,
         0.,  1.,  1.,  0.,  2.,  2.,  2.,  2.,  1.,  1.,  1.,  3.,  2.,
         1.,  1.,  2.,  2.,  2.,  1.,  2.,  0.,  0.,  2.,  2.,  1.,  2.,
         1.,  2.,  1.,  0.,  2.,  0.,  2.,  1.,  2.,  1.,  0.,  1.,  3.,
         1.,  3.,  1.,  1.,  0.,  2.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,
         1.,  1.,  0.,  1.,  2.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,
         0.,  1.,  2.,  1.,  0.,  0.,  1.,  0.,  1.,  0.,  1.,  0.,  1.,
         0.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  1.,  2.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,  1.,  0.,  0.,
         0.,  0.,  0.,  1.,  1.,  2.,  0.,  1.,  0.,  0.,  1.,  1.,  0.,
         0.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,
         0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  1.,  0.,
         2.,  1.,  2.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
         2.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.

## 新規感染者数

In [37]:
fig = plt.figure(figsize=(12,8))

plt.subplot(1,2,1) # (rows, columns, panel number)
plt.title("新規感染者数 Worst 50({})".format(latest_date))
plt.yticks(fontsize=9)
plt.xlabel("count (people)")
chart_data = country.sort_values("new_confirmed",ascending=False).head(50).sort_values("new_confirmed",ascending=True)
plt.barh(chart_data["Country/Region"],chart_data["new_confirmed"],label="positive")
plt.grid(axis="x")
mean = int(country.new_confirmed.mean())
plt.plot([mean,mean],[0,50], 'k--', lw=0.5)
plt.text(mean,50,"平均:{}".format(mean),ha="center",fontsize=9)
plt.legend()

plt.subplot(1,2,2) # (rows, columns, panel number)
plt.title("人口１０万人当たり新規感染者数 Worst 50({})".format(latest_date))
plt.yticks(fontsize=9)
plt.xlabel("count (people)")
chart_data = c_merged.sort_values("new positives per 100,000 population",ascending=False).head(50).sort_values("new positives per 100,000 population",ascending=True)
plt.barh(chart_data["Country/Region"],chart_data["new positives per 100,000 population"],label="positive")
plt.grid(axis="x")
mean = int(c_merged["new positives per 100,000 population"].mean())
plt.plot([mean,mean],[0,50], 'k--', lw=0.5)
plt.text(mean,50,"平均:{}".format(mean),ha="center",fontsize=9)
plt.legend()

fig.text(0,0
         ,"※感染者の情報提供:COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University,"\
         +"※人口の情報提供:WHO",
        fontsize=7,ha="left",va="bottom")
#fig.text(1,0.0,"※人口の情報提供:WHO",fontsize=8,ha="right")

fig.tight_layout()

<IPython.core.display.Javascript object>

  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()
  fig.tight_layout()


In [38]:
#fig.savefig("out/covid-global-new-positives-latest-top.png")

### WORST50

In [39]:
country.sort_values("new_confirmed",ascending=False).head(50).reset_index()


Unnamed: 0,index,Country/Region,latest_confirmed2,latest_confirmed,new_confirmed
0,88,Japan,23448938,23540544,91606
1,9,Australia,10493919,10553237,59318
2,186,US,98193437,98245940,52503
3,94,"Korea, South",26412901,26462319,49418
4,63,France,37437677,37471008,33331
5,24,Brazil,34938073,34971043,32970
6,67,Germany,36152490,36180077,27587
7,190,United Kingdom,24180142,24203583,23441
8,175,Taiwan*,8092118,8112234,20116
9,62,Finland,1372651,1384654,12003
