# 世界の最新の感染者数

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
del df["Lat"]
del df["Long"]

In [3]:
df.head()

Unnamed: 0,Province/State,Country/Region,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,5/5/21,5/6/21,5/7/21,5/8/21,5/9/21,5/10/21,5/11/21,5/12/21,5/13/21,5/14/21
0,,Afghanistan,0,0,0,0,0,0,0,0,...,60797,61162,61455,61755,61842,62063,62403,62718,63045,63355
1,,Albania,0,0,0,0,0,0,0,0,...,131419,131510,131577,131666,131723,131753,131803,131845,131890,131939
2,,Algeria,0,0,0,0,0,0,0,0,...,123272,123473,123692,123900,124104,124288,124483,124682,124889,125059
3,,Andorra,0,0,0,0,0,0,0,0,...,13340,13363,13390,13406,13423,13429,13447,13470,13470,13510
4,,Angola,0,0,0,0,0,0,0,0,...,27529,27921,28201,28477,28740,28875,29146,29405,29695,30030


In [4]:
latest_date = df.tail(1).columns[-1]
latest_date

'5/14/21'

In [5]:
latest_column_key = df.columns[-1]
df = df[['Province/State','Country/Region',latest_column_key]]
df = df.rename(columns={latest_column_key:"latest_confirmed"})
df.head()

Unnamed: 0,Province/State,Country/Region,latest_confirmed
0,,Afghanistan,63355
1,,Albania,131939
2,,Algeria,125059
3,,Andorra,13510
4,,Angola,30030


## 全世界の最新の感染者数

In [6]:
print("全世界感染者数({}時点):{:,}人".format(latest_date,df["latest_confirmed"].sum()))

全世界感染者数(5/14/21時点):161,894,208人


## 国別の最新の感染者数

In [7]:
country = df.groupby(["Country/Region"],as_index=False).sum("latest_confirmed")
country = country.sort_values("latest_confirmed", ascending=False)

In [8]:
country.head()

Unnamed: 0,Country/Region,latest_confirmed
178,US,32895169
79,India,24372907
23,Brazil,15519525
62,France,5909386
177,Turkey,5095390


In [9]:
import matplotlib.pyplot as plt
%matplotlib notebook
#%matplotlib inline

In [10]:
plt.figure(figsize=(9.5,5))
plt.title("Worst 50")
plt.xticks(rotation="90",fontsize=9)
plt.ylabel("count (1000 people)")
#fig.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.bar(country.head(50)["Country/Region"],country.head(50)["latest_confirmed"]/1000,label="positive")
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

## ヒストグラム 

In [11]:
plt.figure(figsize=(10,5))
plt.title("histgram(million)")
#plt.xticks(rotation="90",fontsize=9)
#plt.ylabel("count (1000 people)")
#fig.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.hist(country["latest_confirmed"]/1000000,bins=20)

<IPython.core.display.Javascript object>

(array([173.,   9.,   5.,   2.,   0.,   0.,   0.,   0.,   0.,   1.,   0.,
          0.,   0.,   0.,   1.,   0.,   0.,   0.,   0.,   1.]),
 array([1.00000000e-06, 1.64475940e+00, 3.28951780e+00, 4.93427620e+00,
        6.57903460e+00, 8.22379300e+00, 9.86855140e+00, 1.15133098e+01,
        1.31580682e+01, 1.48028266e+01, 1.64475850e+01, 1.80923434e+01,
        1.97371018e+01, 2.13818602e+01, 2.30266186e+01, 2.46713770e+01,
        2.63161354e+01, 2.79608938e+01, 2.96056522e+01, 3.12504106e+01,
        3.28951690e+01]),
 <a list of 20 Patch objects>)

## 人口当たりの分析

In [12]:
pop = pd.read_excel("world-data/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx",header=16)
pop.head(1)

Unnamed: 0,Index,Variant,"Region, subregion, country or area *",Notes,Country code,Type,Parent code,1950,1951,1952,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,1,Estimates,WORLD,,900,World,0,2536431.018,2584034.227,2630861.69,...,7041194.168,7125827.957,7210582.041,7295290.759,7379796.967,7464021.934,7547858.9,7631091.113,7713468.205,7794798.729


In [13]:
pop.iloc[:,-1]

0      7794798.729
1              ...
2      1273304.261
3      6521494.468
4      1057438.163
          ...     
284         62.273
285      37742.157
286         56.772
287          5.795
288     331002.647
Name: 2020, Length: 289, dtype: object

In [14]:
pop_latest = pop.loc[pop["Type"]=="Country/Area"][[pop.columns[2],pop.columns[-1]]]

In [15]:
pop_latest.columns = ["country","population"]

In [16]:
pop_latest.head()

Unnamed: 0,country,population
26,Burundi,11890.781
27,Comoros,869.595
28,Djibouti,988.002
29,Eritrea,3546.427
30,Ethiopia,114963.583


In [17]:
country.head()

Unnamed: 0,Country/Region,latest_confirmed
178,US,32895169
79,India,24372907
23,Brazil,15519525
62,France,5909386
177,Turkey,5095390


### 世界全体での人口10万人あたり陽性者数

In [18]:
country.latest_confirmed.sum()

161894208

In [19]:
pop_latest.population.sum()

7794798.729

In [20]:
print("****世界全体での人口10万人あたり陽性者数:{}****".format(country.latest_confirmed.sum() / (pop_latest.population.sum() / 100)))

****世界全体での人口10万人あたり陽性者数:2076.9517421621676****


### 各国の人口１０万人あたり陽性者数

In [21]:
c_merged = pd.merge(country, pop_latest, left_on="Country/Region", right_on="country", how="left")
c_merged.loc[c_merged["country"].isna()]

Unnamed: 0,Country/Region,latest_confirmed,country,population
0,US,32895169,,
5,Russia,4866641,,
13,Iran,2732152,,
54,Bolivia,327224,,
57,West Bank and Gaza,303270,,
65,Moldova,253567,,
72,Venezuela,212998,,
81,Burma,143035,,
84,"Korea, South",131061,,
90,Kosovo,106243,,


In [22]:
p_merged = pd.merge(pop_latest, country, left_on="country", right_on="Country/Region", how="left")
len(p_merged.loc[p_merged["Country/Region"].isna()])

65

In [23]:
pop_latest.loc[pop_latest["country"].str.contains("Syria")]

Unnamed: 0,country,population
104,Syrian Arab Republic,17500.657


In [24]:
renames = {
    "US":"United States of America",
    "Russia":"Russian Federation",
    "Iran":"Iran (Islamic Republic of)",
    "Bolivia":"Bolivia (Plurinational State of)",
    "Moldova":"Republic of Moldova",
    "Venezuela":"Venezuela (Bolivarian Republic of)",
    "Korea, South":"Republic of Korea",
    "Congo (Kinshasa)":"Democratic Republic of the Congo",
    "Congo (Brazzaville)":"Congo",
    "Vietnam":"Viet Nam",
    "Laos":"Lao People's Democratic Republic",
    "Taiwan*":"China, Taiwan Province of China",
    "Tanzania":"United Republic of Tanzania",
    "Brunei":"Brunei Darussalam",
    "Micronesia":"Micronesia (Fed. States of)",
    "Syria":"Syrian Arab Republic",
}

In [25]:
country_renamed = country.copy()
country_renamed["Country/Region"].replace(renames,inplace=True)

In [26]:
c_merged = pd.merge(country_renamed, pop_latest, left_on="Country/Region", right_on="country", how="left")
c_merged.loc[c_merged["country"].isna()]

Unnamed: 0,Country/Region,latest_confirmed,country,population
57,West Bank and Gaza,303270,,
81,Burma,143035,,
90,Kosovo,106243,,
110,Cote d'Ivoire,46535,,
178,Diamond Princess,712,,
187,MS Zaandam,9,,


In [27]:
c_merged["positives per 1000 population"] = c_merged["latest_confirmed"] / (c_merged["population"])
c_merged["positives per 100,000 population"] = (c_merged["latest_confirmed"] / (c_merged["population"]/100)).astype(float)

In [28]:
c_merged = c_merged.sort_values("positives per 1000 population", ascending=False)

In [29]:
c_merged.loc[c_merged["Country/Region"]=="United States of America"]

Unnamed: 0,Country/Region,latest_confirmed,country,population,positives per 1000 population,"positives per 100,000 population"
0,United States of America,32895169,United States of America,331002.647,99.38038,9938.038048


In [30]:
plt.figure(figsize=(9.5,5))
plt.title("positives per pupulation worst 50")
plt.xticks(rotation="90",fontsize=9)
plt.ylabel("count per 100,000 population")
#fig.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.bar(c_merged.head(50)["Country/Region"],c_merged.head(50)["positives per 100,000 population"],label="positive")
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

In [31]:
c_asc = c_merged.sort_values("positives per 1000 population")
plt.figure(figsize=(9.5,5))
plt.title("positives per pupulation best 50")
plt.xticks(rotation="90",fontsize=9)
plt.ylabel("count per 100,000 population")
#fig.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.bar(c_asc.head(50)["Country/Region"],c_asc.head(50)["positives per 100,000 population"],label="positive")
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

In [37]:
plt.figure(figsize=(9.5,5))
plt.title("histgram(positives per 100,000 pop)")
plt.hist(c_merged["positives per 100,000 population"],bins=[0,2000,4000,8000,10000,12000,14000,16000,18000])

<IPython.core.display.Javascript object>

(array([103.,  22.,  39.,  14.,   4.,   0.,   3.,   1.]),
 array([    0,  2000,  4000,  8000, 10000, 12000, 14000, 16000, 18000]),
 <a list of 8 Patch objects>)

In [33]:
c_merged.describe()

Unnamed: 0,latest_confirmed,"positives per 100,000 population"
count,192.0,186.0
mean,843199.0,3230.945482
std,3242829.0,3754.951505
min,1.0,0.852108
25%,11263.25,174.594327
50%,94646.5,1561.33109
75%,377201.2,6056.799349
max,32895170.0,17485.27794


In [34]:
c_merged.head()

Unnamed: 0,Country/Region,latest_confirmed,country,population,positives per 1000 population,"positives per 100,000 population"
135,Andorra,13510,Andorra,77.265,174.852779,17485.27794
93,Montenegro,98703,Montenegro,628.062,157.154867,15715.486688
18,Czechia,1651178,Czechia,10708.982,154.186271,15418.627093
156,San Marino,5083,San Marino,33.938,149.773116,14977.311568
66,Slovenia,248520,Slovenia,2078.932,119.54215,11954.214953


In [35]:
c_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 192 entries, 135 to 187
Data columns (total 6 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Country/Region                    192 non-null    object 
 1   latest_confirmed                  192 non-null    int64  
 2   country                           186 non-null    object 
 3   population                        186 non-null    object 
 4   positives per 1000 population     186 non-null    object 
 5   positives per 100,000 population  186 non-null    float64
dtypes: float64(1), int64(1), object(4)
memory usage: 10.5+ KB


In [40]:
plt.figure(figsize=(9.5,5))
plt.title("histgram(positives per 100,000 pop)")
plt.hist(c_merged["positives per 100,000 population"],bins=200)

<IPython.core.display.Javascript object>

(array([26., 21., 12.,  6.,  2.,  4.,  2.,  2.,  1.,  0.,  0.,  1.,  6.,
         1.,  2.,  3.,  2.,  2.,  3.,  1.,  1.,  3.,  2.,  0.,  2.,  2.,
         1.,  0.,  0.,  2.,  1.,  2.,  3.,  0.,  0.,  0.,  1.,  2.,  2.,
         0.,  1.,  1.,  0.,  0.,  0.,  2.,  0.,  0.,  1.,  1.,  0.,  0.,
         2.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  0.,  1.,  2.,  0.,
         1.,  0.,  1.,  1.,  2.,  1.,  1.,  1.,  0.,  0.,  1.,  1.,  3.,
         2.,  0.,  1.,  2.,  1.,  1.,  2.,  1.,  1.,  0.,  1.,  3.,  1.,
         0.,  1.,  1.,  2.,  0.,  0.,  1.,  1.,  0.,  0.,  1.,  0.,  1.,
         0.,  0.,  1.,  1.,  0.,  1.,  2.,  0.,  0.,  1.,  0.,  0.,  0.,
         1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,
         0.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.