# Covid19 Analyzer
数据分析和展示应包括:

1) 15 天中，全球新冠疫情的总体变化趋势;

2) 15 天中，每日新增确诊数累计排名前 10 个国家的每日新增确诊数据的曲线图;

3) 累计确诊数排名前 10 的国家名称及其数量;

4) 用饼图展示各个国家的累计确诊人数的比例(你爬取的所有国家，数据较小的国家
可以合并处理);

5) 累计确诊人数占国家总人口比例最高的 10 个国家;

6) 疫苗接种情况(至少接种了一针及以上)，请用地图形式展示;

7) 疫苗接种率(累计疫苗接种人数/国家人数)最低的 10 个国家;

8) 全球 GDP 前十名国家的累计确诊人数箱型图，要有平均值;

9) 死亡率最高的 10 个国家;

10) 其它你希望分析和展示的数据。

以上图形应包括完整的坐标、刻度、标签、图例等，如有必要请配上说明文字，对
图中的内容进行解释。

根据以上数据，列出全世界应对新冠疫情最好的 10 个国家，并说明你的理由。

针对全球累计确诊数，利用前 10 天采集到的数据做后 5 天的预测，并与实际数据进行对比。说明你预测的方法，并分析与实际数据的差距和原因。

## 导入数据

In [1]:
import pandas as pd

dfs = []
for i in range(1, 2):
    df = pd.read_csv('./dataFetcher/day' + str(i) + '_data.csv').sort_values('confirmed', ascending=False).reset_index(drop=True)

    # 保存为float便于计算
    df['confirmed'].astype(float)
    df['deceased'].astype(float)
    df['active'].astype(float)
    df['recovered'].astype(float)
    df['vaccinated'].astype(float)
    df['population'].astype(float)

    dfs.append(df)
len_dfs = len(dfs)
print(f'{len_dfs} files imported')

1 files imported


## 计算新增数据

In [2]:
dfs[0]['new_confirmed'] = 0

for i in range(1, len_dfs):
    dfs[i]['new_confirmed'] = dfs[i]['confirmed'] - dfs[i - 1]['confirmed']
    print(dfs[i])

## 计算新增排名

In [3]:
accu_nc = pd.DataFrame(dfs[0]['new_confirmed'])
accu_nc.rename(columns={'new_confirmed': 'day1'}, inplace=True)
for i in range(1, len_dfs):
    accu_nc = pd.concat([accu_nc, dfs[i]['new_confirmed']], axis=1)
    accu_nc.rename(columns={'new_confirmed': 'day' + str(i + 1)}, inplace=True)


accu_nc['total'] = 0
for i in range(len_dfs):
    accu_nc['total'] += accu_nc['day' + str(i + 1)]

accu_nc = pd.concat([dfs[0]['region'], accu_nc], axis=1)
accu_nc = accu_nc.sort_values('total', ascending=False).reset_index(drop=True)
print(accu_nc[1:11])

         region  day1  total
1      Djibouti     0      0
2      Barbados     0      0
3          Togo     0      0
4      Tanzania     0      0
5         Haiti     0      0
6         Benin     0      0
7    Seychelles     0      0
8       Lesotho     0      0
9       Somalia     0      0
10  The Bahamas     0      0


## 计算确诊率/死亡率/疫苗接种率

In [4]:
for df in dfs:
    df['confirmed_rate'] = df['confirmed'] / df['population']
    df['deceased_rate'] = df['deceased'] / df['population']
    df['vaccinated_rate'] = df['vaccinated'] / df['population']
    print(df)

                   region    confirmed   deceased      active    recovered  \
0                   World  270426226.0  5321864.0  23517119.0  241335916.0   
1           United States   50801455.0   817956.0   9979825.0   40003674.0   
2                   India   34694142.0   475434.0     95913.0   34122795.0   
3                  Brazil   22189867.0   616941.0    158608.0   21414318.0   
4          United Kingdom   10819515.0   146439.0   1182669.0    9490407.0   
..                    ...          ...        ...         ...          ...   
221                 Tonga          1.0        NaN         NaN          1.0   
222            Micronesia          1.0        NaN         NaN          1.0   
223              Kiribati          NaN        NaN         NaN          NaN   
224  Summer Olympics 2020          NaN        NaN         NaN          NaN   
225                 Spain          NaN        NaN         NaN          NaN   

       vaccinated    population  new_confirmed  confirmed_rate 

## 计算全球变化趋势

In [5]:
world_confirmed = pd.DataFrame(dfs[0].loc[[0], ['confirmed']])
world_confirmed.rename(columns={'confirmed': 'day1'}, inplace=True)
for i in range(1, len_dfs):
    world_confirmed = pd.concat([world_confirmed, dfs[i].loc[[0], ['confirmed']]], axis=1)
    world_confirmed.rename(columns={'confirmed': 'day' + str(i + 1)}, inplace=True)
print(world_confirmed)

world_deceased = pd.DataFrame(dfs[0].loc[[0], ['deceased']])
world_deceased.rename(columns={'deceased': 'day1'}, inplace=True)
for i in range(1, len_dfs):
    world_deceased = pd.concat([world_deceased, dfs[i].loc[[0], ['deceased']]], axis=1)
    world_deceased.rename(columns={'deceased': 'day' + str(i + 1)}, inplace=True)

world_active = pd.DataFrame(dfs[0].loc[[0], ['active']])
world_active.rename(columns={'active': 'day1'}, inplace=True)
for i in range(1, len_dfs):
    world_active = pd.concat([world_active, dfs[i].loc[[0], ['active']]], axis=1)
    world_active.rename(columns={'active': 'day' + str(i + 1)}, inplace=True)

world_recovered = pd.DataFrame(dfs[0].loc[[0], ['recovered']])
world_recovered.rename(columns={'recovered': 'day1'}, inplace=True)
for i in range(1, len_dfs):
    world_recovered = pd.concat([world_recovered, dfs[i].loc[[0], ['recovered']]], axis=1)
    world_recovered.rename(columns={'recovered': 'day' + str(i + 1)}, inplace=True)

world_vaccinated = pd.DataFrame(dfs[0].loc[[0], ['vaccinated']])
world_vaccinated.rename(columns={'vaccinated': 'day1'}, inplace=True)
for i in range(1, len_dfs):
    world_vaccinated = pd.concat([world_vaccinated, dfs[i].loc[[0], ['vaccinated']]], axis=1)
    world_vaccinated.rename(columns={'vaccinated': 'day' + str(i + 1)}, inplace=True)

          day1
0  270426226.0


## 绘图部分

### 15日全球疫情总体变化趋势

In [6]:
import pyecharts.options as opts
from pyecharts.charts import Line
x_data = world_confirmed.columns.values.tolist()

y_confirmed = world_confirmed.loc[0].values.tolist()
y_deceased = world_deceased.loc[0].values.tolist()
y_active = world_active.loc[0].values.tolist()
y_recovered = world_recovered.loc[0].values.tolist()

world_trend = (
    Line()
    .set_global_opts(
        title_opts=opts.TitleOpts(title='15日全球疫情总体变化趋势'),
        tooltip_opts=opts.TooltipOpts(is_show=False),
        xaxis_opts=opts.AxisOpts(type_="category"),
        yaxis_opts=opts.AxisOpts(type_='value')
    )
    .add_xaxis(xaxis_data=x_data)
    .add_yaxis(series_name="累计确诊",
               y_axis=y_confirmed)
    .add_yaxis(series_name='现有确诊',
               y_axis=y_active)
    .add_yaxis(series_name='死亡',
               y_axis=y_deceased)
    .add_yaxis(series_name='治愈',
               y_axis=y_recovered)
)
world_trend.render_notebook()

### 15日新增确诊排名前10国家新增确诊趋势

In [7]:
y_new_confirm = []
regions = accu_nc['region'].values.tolist()[1:]
for i in range(1, 11):
    y_new_confirm.append(accu_nc.loc[i].values.tolist())

top10_nc = (
    Line()
    .set_global_opts(
        title_opts=opts.TitleOpts(title='15日新增确诊排名前10国家新增确诊趋势'),
        tooltip_opts=opts.TooltipOpts(is_show=False),
        xaxis_opts=opts.AxisOpts(type_="category"),
        yaxis_opts=opts.AxisOpts(type_='value')
    )
    .add_xaxis(xaxis_data=x_data)
    .add_yaxis(series_name=regions[0],
               y_axis=y_new_confirm[0])
    .add_yaxis(series_name=regions[1],
               y_axis=y_new_confirm[1])
    .add_yaxis(series_name=regions[2],
               y_axis=y_new_confirm[2])
    .add_yaxis(series_name=regions[3],
               y_axis=y_new_confirm[3])
    .add_yaxis(series_name=regions[4],
               y_axis=y_new_confirm[4])
    .add_yaxis(series_name=regions[5],
               y_axis=y_new_confirm[5])
    .add_yaxis(series_name=regions[6],
               y_axis=y_new_confirm[6])
    .add_yaxis(series_name=regions[7],
               y_axis=y_new_confirm[7])
    .add_yaxis(series_name=regions[8],
               y_axis=y_new_confirm[8])
    .add_yaxis(series_name=regions[9],
               y_axis=y_new_confirm[9])
)
top10_nc.render_notebook()

### 各国累计确诊人数占全球确诊人数比例

In [8]:
from pyecharts.charts import Pie

regions = dfs[-1]['region'].values.tolist()[1:21]
regions.append('others')
nc_data = dfs[-1]['confirmed'].values.tolist()
t = nc_data[21:]
nc_data = nc_data[1:21]
nc_data.append(sum(t))

nc_pie = (
    Pie()
    .set_global_opts(
        title_opts=opts.TitleOpts(title='各国累计确诊人数占全球确诊人数比例'),
    )
    .add('', [list(z) for z in zip(regions, nc_data)])
    .set_series_opts(label_opts=opts.LabelOpts(formatter='{b}: {c}'))
)
nc_pie.render_notebook()
