In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### 数据预处理

#### 数据来源

- https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data

#### 栏位说明

- FIPS：仅限美国。联邦信息处理标准代码，用于唯一标识美国境内的县。
- Admin2：县名。仅限美国。
- Province_State：省，州或依存关系名称。
- Country_Region：国家，地区或主权名称。网站上包含的位置名称与美国国务院使用的官方名称相对应。
- 最近更新：MM / DD / YYYY HH：mm：ss（24小时格式，采用UTC）。
- 纬度和经度_：仪表板上的点位置。地图上显示的所有点（澳大利亚除外）均以地理质心为基础，并不代表特定的地址，建筑物或空间位置上比省/州更精细的任何位置。澳大利亚点位于每个州最大城市的中心。
- 已确认：计数包括已确认和可能（已报告）。
- 死亡：计数包括已确认和可能的（已报告）。
- 已恢复：恢复的案例是基于本地媒体报告以及州和本地报告（如果有）的估计，因此可能大大低于真实数字。美国的州级恢复病例来自COVID跟踪项目。
- 活跃 Active：活跃病例=总病例-康复总数-死亡总数。
- 发病率 Incidence_Rate：发病率=每100,000人的病例数。
- 病死率（％） Fatality_ration：病死率（％）=记录的死亡人数/病例数。
- 报告的所有病例，死亡和康复情况均基于初次报告的日期。例外情况在下面的“数据修改”和“（可能的）病例和死亡的追溯报告”小节中说明。

In [3]:
TodaysData_Country = pd.read_csv('./dataset/COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/08-25-2020.csv')

In [4]:
del TodaysData_Country['FIPS']
del TodaysData_Country['Admin2']

In [5]:
TodaysData_Country.head()

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incidence_Rate,Case-Fatality_Ratio
0,,Afghanistan,2020-08-26 04:28:11,33.93911,67.709953,38070,1397,28440,8233.0,Afghanistan,97.795074,3.669556
1,,Albania,2020-08-26 04:28:11,41.1533,20.1683,8759,259,4530,3970.0,Albania,304.364445,2.956959
2,,Algeria,2020-08-26 04:28:11,28.0339,1.6596,42228,1456,29587,11185.0,Algeria,96.298736,3.447949
3,,Andorra,2020-08-26 04:28:11,42.5063,1.5218,1060,53,877,130.0,Andorra,1371.901896,5.0
4,,Angola,2020-08-26 04:28:11,-11.2027,17.8739,2283,102,977,1204.0,Angola,6.946332,4.467806


In [6]:
TodaysData_Country.shape

(3950, 12)

#### 数据清理

In [7]:
ColumnToClean = ['Confirmed', 'Deaths', 'Recovered', 'Active']

In [8]:
# filling missing values 
TodaysData_Country[['Country_Region']] = TodaysData_Country[['Country_Region']].fillna('')
TodaysData_Country[ColumnToClean] = TodaysData_Country[ColumnToClean].fillna(0)
TodaysData_Country.loc[TodaysData_Country['Country_Region'] == "United Kingdom", "Country_Region"] = "UK"
TodaysData_Country.head()

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incidence_Rate,Case-Fatality_Ratio
0,,Afghanistan,2020-08-26 04:28:11,33.93911,67.709953,38070,1397,28440,8233.0,Afghanistan,97.795074,3.669556
1,,Albania,2020-08-26 04:28:11,41.1533,20.1683,8759,259,4530,3970.0,Albania,304.364445,2.956959
2,,Algeria,2020-08-26 04:28:11,28.0339,1.6596,42228,1456,29587,11185.0,Algeria,96.298736,3.447949
3,,Andorra,2020-08-26 04:28:11,42.5063,1.5218,1060,53,877,130.0,Andorra,1371.901896,5.0
4,,Angola,2020-08-26 04:28:11,-11.2027,17.8739,2283,102,977,1204.0,Angola,6.946332,4.467806


### 数据可视化与统计

#### 拥有前10死亡的国家

In [9]:
TodaysData_Country_drop=TodaysData_Country.drop(['Province_State','Last_Update', 'Lat','Long_','Incidence_Rate','Case-Fatality_Ratio'], axis=1)

In [10]:
TodaysData_Country_drop

Unnamed: 0,Country_Region,Confirmed,Deaths,Recovered,Active,Combined_Key
0,Afghanistan,38070,1397,28440,8233.0,Afghanistan
1,Albania,8759,259,4530,3970.0,Albania
2,Algeria,42228,1456,29587,11185.0,Algeria
3,Andorra,1060,53,877,130.0,Andorra
4,Angola,2283,102,977,1204.0,Angola
...,...,...,...,...,...,...
3945,West Bank and Gaza,19678,133,13162,6383.0,West Bank and Gaza
3946,Western Sahara,10,1,8,1.0,Western Sahara
3947,Yemen,1924,557,1091,276.0,Yemen
3948,Zambia,11285,282,10400,603.0,Zambia


In [11]:
Country_Region=set(list(TodaysData_Country_drop['Country_Region']))

In [12]:
'统计的国家/地区总数'+str(len(Country_Region))

'统计的国家/地区总数188'

- 合并数据

In [13]:
death=TodaysData_Country_drop['Deaths'].groupby(TodaysData_Country_drop['Country_Region'])
confirmed=TodaysData_Country_drop['Confirmed'].groupby(TodaysData_Country_drop['Country_Region'])
recovered=TodaysData_Country_drop['Recovered'].groupby(TodaysData_Country_drop['Country_Region'])
actived=TodaysData_Country_drop['Active'].groupby(TodaysData_Country_drop['Country_Region'])

In [14]:
death_sum=death.sum()
confirmed_sum=confirmed.sum()
recovered_sum=recovered.sum()
actived_sum=actived.sum()

In [15]:
countries_death={'Country_Region':death_sum.index,'Deaths':death_sum.values,'Confirmed':confirmed_sum.values,'Recovered':recovered_sum.values,'Active':actived_sum.values}

In [16]:
countries_death=pd.DataFrame(countries_death)

In [17]:
top10_countries_death=countries_death.nlargest(10,'Deaths')
top10_countries_death.head(10)

Unnamed: 0,Country_Region,Deaths,Confirmed,Recovered,Active
175,US,178486,5777710,2053699,3545444.0
23,Brazil,116580,3669995,3032551,520864.0
113,Mexico,61450,568621,469206,37965.0
79,India,59357,3224547,2458339,707267.0
174,UK,41535,329821,1551,286735.0
85,Italy,35445,261174,206015,19714.0
62,France,30549,285902,85757,169596.0
158,Spain,28924,412553,150376,233253.0
134,Peru,27813,600438,414577,158048.0
81,Iran,20901,363363,313058,29404.0


In [18]:
top10_countries_confirmed=countries_death.nlargest(10,'Confirmed')
top10_countries_confirmed

Unnamed: 0,Country_Region,Deaths,Confirmed,Recovered,Active
175,US,178486,5777710,2053699,3545444.0
23,Brazil,116580,3669995,3032551,520864.0
79,India,59357,3224547,2458339,707267.0
140,Russia,16524,963655,777960,169171.0
156,South Africa,13308,613017,520381,79328.0
134,Peru,27813,600438,414577,158048.0
113,Mexico,61450,568621,469206,37965.0
37,Colombia,17889,562113,395463,148769.0
158,Spain,28924,412553,150376,233253.0
35,Chile,10958,400985,374463,15564.0


In [19]:
top10_countries_recovered=countries_death.nlargest(10,'Recovered')
top10_countries_recovered

Unnamed: 0,Country_Region,Deaths,Confirmed,Recovered,Active
23,Brazil,116580,3669995,3032551,520864.0
79,India,59357,3224547,2458339,707267.0
175,US,178486,5777710,2053699,3545444.0
140,Russia,16524,963655,777960,169171.0
156,South Africa,13308,613017,520381,79328.0
113,Mexico,61450,568621,469206,37965.0
134,Peru,27813,600438,414577,158048.0
37,Colombia,17889,562113,395463,148769.0
35,Chile,10958,400985,374463,15564.0
81,Iran,20901,363363,313058,29404.0


In [20]:
top10_countries_actived=countries_death.nlargest(10,'Active')

In [21]:
top10_countries_actived

Unnamed: 0,Country_Region,Deaths,Confirmed,Recovered,Active
175,US,178486,5777710,2053699,3545444.0
79,India,59357,3224547,2458339,707267.0
23,Brazil,116580,3669995,3032551,520864.0
174,UK,41535,329821,1551,286735.0
158,Spain,28924,412553,150376,233253.0
62,France,30549,285902,85757,169596.0
140,Russia,16524,963655,777960,169171.0
134,Peru,27813,600438,414577,158048.0
37,Colombia,17889,562113,395463,148769.0
13,Bangladesh,4028,299628,186756,108844.0


#### top指标图

In [41]:
from plotly.graph_objs import Scatter,Layout
import plotly
import plotly.offline as py
import numpy as np
import plotly.graph_objs as go
import plotly.io as pio
#setting offilne
plotly.offline.init_notebook_mode(connected=True)
from plotly.subplots import make_subplots

In [67]:
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "bar"}, {"type": "bar"}],
           [{"type": "bar"}, {"type": "bar"}]],
    subplot_titles=("确诊数报告top10","死亡数top10", "治愈数top10", "活跃数top10")
)

fig.add_trace(go.Bar(name='Confirmed',text='Confirmed', x=top10_countries_confirmed['Country_Region'], y=top10_countries_confirmed['Confirmed']),
              row=1, col=1)


fig.add_trace(go.Bar(name='Deaths',text='Deaths', x=top10_countries_death['Country_Region'], y=top10_countries_death['Deaths']),
              row=1, col=2)

fig.add_trace(go.Bar(name='Recovered', text='Recovered',x=top10_countries_recovered['Country_Region'], y=top10_countries_recovered['Recovered']),
              row=2, col=1)
fig.add_trace(go.Bar(name='Active', text='Active',x=top10_countries_actived['Country_Region'], y=top10_countries_actived['Active']),
              row=2, col=2)

fig.update_layout(title_text="2020-08-25 有关Covid-19世界范围前十国家", showlegend=False,template="ggplot2")

#fig.show()
plotly.offline.iplot(fig)

- top10 死亡数和 bottom10 死亡数国家

In [54]:
countries_death

Unnamed: 0,Country_Region,Deaths,Confirmed,Recovered,Active
0,Afghanistan,1397,38070,28440,8233.0
1,Albania,259,8759,4530,3970.0
2,Algeria,1456,42228,29587,11185.0
3,Andorra,53,1060,877,130.0
4,Angola,102,2283,977,1204.0
...,...,...,...,...,...
183,West Bank and Gaza,133,19678,13162,6383.0
184,Western Sahara,1,10,8,1.0
185,Yemen,557,1924,1091,276.0
186,Zambia,282,11285,10400,603.0


In [55]:
countries_death=countries_death.query("Deaths>0")
bottom10_countries_death=countries_death.nsmallest(10,'Deaths')
bottom10_countries_death

Unnamed: 0,Country_Region,Deaths,Confirmed,Recovered,Active
28,Burundi,1,430,345,84.0
60,Fiji,1,28,23,4.0
101,Liechtenstein,1,102,94,7.0
184,Western Sahara,1,10,8,1.0
104,MS Zaandam,2,9,0,7.0
5,Antigua and Barbuda,3,94,89,2.0
22,Botswana,3,1562,199,1360.0
24,Brunei,3,144,139,2.0
115,Monaco,4,154,116,35.0
132,Papua New Guinea,4,419,232,183.0


In [66]:
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "bar"}, {"type": "bar"}]],
    subplot_titles=("死亡数top10","死亡数bottom10")
)

fig.add_trace(go.Bar(name='Deaths',text='Deaths', x=top10_countries_death['Country_Region'], y=top10_countries_death['Deaths']),
              row=1, col=1)


fig.add_trace(go.Bar(name='Deaths',text='Deaths', x=bottom10_countries_death['Country_Region'], y=bottom10_countries_death['Deaths']),
              row=1, col=2)

fig.update_layout(title_text="截至2020-08-25 有关Covid-19世界范围最高死亡和最低死亡前10国家", showlegend=False,template="ggplot2")

#fig.show()
plotly.offline.iplot(fig)

### 时序图 

In [26]:
Time_series_covid19_confirmed= pd.read_csv('./dataset/COVID-19-master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

In [27]:
Time_series_covid19_confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,8/16/20,8/17/20,8/18/20,8/19/20,8/20/20,8/21/20,8/22/20,8/23/20,8/24/20,8/25/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,37596,37599,37599,37599,37856,37894,37953,37999,38054,38070
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,7380,7499,7654,7812,7967,8119,8275,8427,8605,8759
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,38583,39025,39444,39847,40258,40667,41068,41460,41858,42228
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,989,1005,1005,1024,1024,1045,1045,1045,1060,1060
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,1906,1935,1966,2015,2044,2068,2134,2171,2222,2283


In [28]:
del Time_series_covid19_confirmed['Province/State']

In [29]:
Time_series_covid19_confirmed.head()

Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,8/16/20,8/17/20,8/18/20,8/19/20,8/20/20,8/21/20,8/22/20,8/23/20,8/24/20,8/25/20
0,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,0,...,37596,37599,37599,37599,37856,37894,37953,37999,38054,38070
1,Albania,41.1533,20.1683,0,0,0,0,0,0,0,...,7380,7499,7654,7812,7967,8119,8275,8427,8605,8759
2,Algeria,28.0339,1.6596,0,0,0,0,0,0,0,...,38583,39025,39444,39847,40258,40667,41068,41460,41858,42228
3,Andorra,42.5063,1.5218,0,0,0,0,0,0,0,...,989,1005,1005,1024,1024,1045,1045,1045,1060,1060
4,Angola,-11.2027,17.8739,0,0,0,0,0,0,0,...,1906,1935,1966,2015,2044,2068,2134,2171,2222,2283


In [30]:
Time_series_covid19_confirmed.shape

(266, 220)

In [31]:
Country_Region=set(list(Time_series_covid19_confirmed['Country/Region']))

In [32]:
'国家和地区总数'+str(len(Country_Region))

'国家和地区总数188'

In [33]:
Time_series_covid19_confirmed=Time_series_covid19_confirmed.drop(['Lat','Long'],axis=1)

In [34]:
confirmed=Time_series_covid19_confirmed[:].groupby(Time_series_covid19_confirmed['Country/Region'])
confirmed_sum=confirmed.sum()

- 缺失值检测

In [35]:
confirmed_sum[confirmed_sum.isnull().values==True] 

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,8/16/20,8/17/20,8/18/20,8/19/20,8/20/20,8/21/20,8/22/20,8/23/20,8/24/20,8/25/20
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1


In [36]:
confirmed_sum.head(10)

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,8/16/20,8/17/20,8/18/20,8/19/20,8/20/20,8/21/20,8/22/20,8/23/20,8/24/20,8/25/20
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,37596,37599,37599,37599,37856,37894,37953,37999,38054,38070
Albania,0,0,0,0,0,0,0,0,0,0,...,7380,7499,7654,7812,7967,8119,8275,8427,8605,8759
Algeria,0,0,0,0,0,0,0,0,0,0,...,38583,39025,39444,39847,40258,40667,41068,41460,41858,42228
Andorra,0,0,0,0,0,0,0,0,0,0,...,989,1005,1005,1024,1024,1045,1045,1045,1060,1060
Angola,0,0,0,0,0,0,0,0,0,0,...,1906,1935,1966,2015,2044,2068,2134,2171,2222,2283
Antigua and Barbuda,0,0,0,0,0,0,0,0,0,0,...,93,93,93,94,94,94,94,94,94,94
Argentina,0,0,0,0,0,0,0,0,0,0,...,294569,299126,305966,312659,320884,329043,336802,342154,350867,359638
Armenia,0,0,0,0,0,0,0,0,0,0,...,41663,41701,41846,42056,42319,42477,42616,42792,42825,42936
Australia,0,0,0,0,4,5,5,6,9,9,...,23558,23773,23989,24236,24407,24602,24811,24915,25053,25204
Austria,0,0,0,0,0,0,0,0,0,0,...,23370,23534,23829,24084,24431,24762,25062,25253,25495,25706


In [37]:
confirmed_top10_timeseries=confirmed_sum.nlargest(10,'8/25/20')
confirmed_top10_timeseries

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,8/16/20,8/17/20,8/18/20,8/19/20,8/20/20,8/21/20,8/22/20,8/23/20,8/24/20,8/25/20
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
US,1,1,2,2,5,5,5,5,5,7,...,5403213,5438325,5482416,5529824,5573847,5622540,5667112,5701679,5739724,5777710
Brazil,0,0,0,0,0,0,0,0,0,0,...,3340197,3359570,3407354,3456652,3501975,3532330,3582362,3605783,3622861,3669995
India,0,0,0,0,0,0,0,0,1,1,...,2647663,2702681,2767253,2836925,2905825,2975701,3044940,3106348,3167323,3224547
Russia,0,0,0,0,0,0,0,0,0,2,...,920719,925558,930276,935066,939833,944671,949531,954328,959016,963655
South Africa,0,0,0,0,0,0,0,0,0,0,...,587345,589886,592144,596060,599940,603338,607045,609773,611450,613017
Peru,0,0,0,0,0,0,0,0,0,0,...,525803,535946,541493,549321,558420,567059,576067,585236,594326,600438
Mexico,0,0,0,0,0,0,0,0,0,0,...,522162,525733,531239,537031,543806,549734,556216,560164,563705,568621
Colombia,0,0,0,0,0,0,0,0,0,0,...,468332,476660,489122,502178,513719,522138,522138,541139,551688,562113
Spain,0,0,0,0,0,0,0,0,0,0,...,342813,359082,364196,370867,377906,386054,386054,386054,405436,412553
Chile,0,0,0,0,0,0,0,0,0,0,...,385946,387502,388855,390037,391849,393769,395708,397665,399568,400985


- 一共统计的时长

In [38]:
total_days=len(confirmed_top10_timeseries.columns)

In [39]:
total_days

217

- 目前确诊数全球top10的趋势图

In [40]:
data=[]
for i in range(len(confirmed_top10_timeseries.index)):
    trace=go.Scatter(
        x = confirmed_top10_timeseries.columns[55:120],
        y = confirmed_top10_timeseries.loc[confirmed_top10_timeseries.index[i]][55:120].values,
        mode = 'lines',
        name = confirmed_top10_timeseries.index[i],
    )
    data.append(trace)

layout=go.Layout(
    showlegend=True,
    template='ggplot2',
    title='全球TOP10确诊数国家趋势变化图'
)
fig=go.Figure(data=data,layout=layout)
py.iplot(fig)

#### 选取美国作为传染指数计算的国家

- SIR模型计算传染系数

SIR模型是最简单的隔室模型之一，它由三个隔室组成，S为易感性数，I为传染性数，R为恢复或死者数

- N: total population 总人口
- S（t）：第t天易感人群
- I（t）：第t天感染的人数
- R（t）：第t天恢复的人数
- β（“β”）：感染者每天预期感染的人数
- D：被感染者拥有并可以传播疾病的天数
- γ（“伽马”）：每天感染恢复的比例（γ= 1 / D）
- R₀：感染者的总感染人数（R₀=β/γ

- 这些系统对初始参数极为敏感。这就是为什么很难对新出现的新疾病暴发进行正确建模的原因。

### 预测至少3个国家的未来趋势变化(周单位)

- OWID 世界卫生组织COVID19

- 数据来源
https://github.com/owid/covid-19-data/tree/master/public/data

In [108]:
owid_covid19 = pd.read_csv('./dataset/OWID/owid-covid-data.csv')
owid_covid19.head(5)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
0,AFG,Asia,Afghanistan,2019-12-31,0.0,0.0,,0.0,0.0,,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
1,AFG,Asia,Afghanistan,2020-01-01,0.0,0.0,,0.0,0.0,,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
2,AFG,Asia,Afghanistan,2020-01-02,0.0,0.0,,0.0,0.0,,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
3,AFG,Asia,Afghanistan,2020-01-03,0.0,0.0,,0.0,0.0,,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
4,AFG,Asia,Afghanistan,2020-01-04,0.0,0.0,,0.0,0.0,,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83


In [109]:
owid_covid19.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'tests_per_case', 'positive_rate', 'tests_units', 'stringency_index',
       'population', 'population_density', 'median_age', 'aged_65_older',
       'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy'],
      dtype='object')

- 数据清理

In [110]:
owid_covid19=owid_covid19.drop(['iso_code'],axis=1)

In [111]:
owid_covid19

Unnamed: 0,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,...,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
0,Asia,Afghanistan,2019-12-31,0.0,0.0,,0.0,0.0,,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
1,Asia,Afghanistan,2020-01-01,0.0,0.0,,0.0,0.0,,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
2,Asia,Afghanistan,2020-01-02,0.0,0.0,,0.0,0.0,,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
3,Asia,Afghanistan,2020-01-03,0.0,0.0,,0.0,0.0,,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
4,Asia,Afghanistan,2020-01-04,0.0,0.0,,0.0,0.0,,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39899,,International,2020-08-23,696.0,,,7.0,,,,...,,,,,,,,,,
39900,,International,2020-08-24,696.0,,,7.0,,,,...,,,,,,,,,,
39901,,International,2020-08-25,696.0,,,7.0,,,,...,,,,,,,,,,
39902,,International,2020-08-26,696.0,,,7.0,,,,...,,,,,,,,,,


#### 世界范围时序变化图

In [116]:
owid_covid19_world=owid_covid19.query("location=='World'")
owid_covid19_world

Unnamed: 0,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,...,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
39422,,World,2019-12-31,27.0,27.0,,0.0,0.0,,0.003,...,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58
39423,,World,2020-01-01,27.0,0.0,,0.0,0.0,,0.003,...,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58
39424,,World,2020-01-02,27.0,0.0,,0.0,0.0,,0.003,...,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58
39425,,World,2020-01-03,44.0,17.0,,0.0,0.0,,0.006,...,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58
39426,,World,2020-01-04,44.0,0.0,,0.0,0.0,,0.006,...,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39658,,World,2020-08-23,23236871.0,266465.0,251633.857,805567.0,5633.0,5657.000,2981.074,...,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58
39659,,World,2020-08-24,23461368.0,224497.0,250541.286,808889.0,3322.0,5528.429,3009.875,...,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58
39660,,World,2020-08-25,23681725.0,220357.0,253867.714,813427.0,4538.0,5567.857,3038.145,...,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58
39661,,World,2020-08-26,23936430.0,254705.0,253838.714,819801.0,6374.0,5523.714,3070.821,...,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58


In [1]:
fig = go.Figure(data=[
    go.Scatter(x=owid_covid19_world['date'], y=owid_covid19_world['new_cases'],mode='lines',name='新增案例'),
    go.Scatter(x=owid_covid19_world['date'], y=owid_covid19_world['new_deaths'],mode='lines',name='死亡案例'),
])

fig.update_layout(
    title="世界范围新增死亡和新增确诊数量变化趋势图",
    yaxis_title="新增和死亡例",
    xaxis_title="日期",
    showlegend=True,
    template='ggplot2',""
)
#fig.show()
plotly.offline.iplot(fig)

SyntaxError: positional argument follows keyword argument (<ipython-input-1-95b37c36d9b2>, line 11)