# Covid19 Analyzer
数据分析和展示应包括:

1) 15 天中，全球新冠疫情的总体变化趋势;

2) 15 天中，每日新增确诊数累计排名前 10 个国家的每日新增确诊数据的曲线图;

3) 累计确诊数排名前 10 的国家名称及其数量;

4) 用饼图展示各个国家的累计确诊人数的比例(你爬取的所有国家，数据较小的国家
可以合并处理);

5) 累计确诊人数占国家总人口比例最高的 10 个国家;

6) 疫苗接种情况(至少接种了一针及以上)，请用地图形式展示;

7) 疫苗接种率(累计疫苗接种人数/国家人数)最低的 10 个国家;

8) 全球 GDP 前十名国家的累计确诊人数箱型图，要有平均值;

9) 死亡率最高的 10 个国家;

10) 其它你希望分析和展示的数据。

以上图形应包括完整的坐标、刻度、标签、图例等，如有必要请配上说明文字，对
图中的内容进行解释。

根据以上数据，列出全世界应对新冠疫情最好的 10 个国家，并说明你的理由。

针对全球累计确诊数，利用前 10 天采集到的数据做后 5 天的预测，并与实际数据进行对比。说明你预测的方法，并分析与实际数据的差距和原因。

## 导入数据

In [15]:
import pandas as pd

dfs = []
for i in range(5, 12):
    df = pd.read_csv('./dataFetcher/12.' + str(i) + '_data.csv').sort_values('confirmed', ascending=False).reset_index(drop=True)

    # 保存为float便于计算
    df['confirmed'].astype(float)
    df['confirmed'].astype(float)
    df['deceased'].astype(float)
    df['vaccinated'].astype(float)
    df['population'].astype(float)

    dfs.append(df)
len_dfs = len(dfs)
print(f'{len_dfs} files imported')

7 files imported


## 计算新增数据

In [16]:
dfs[0]['new_confirmed'] = 0

for i in range(1, len_dfs):
    dfs[i]['new_confirmed'] = dfs[i]['confirmed'] - dfs[i - 1]['confirmed']
    print(dfs[i])

                   region    confirmed   deceased    vaccinated    population  \
0                   World  266215281.0  5273301.0  4.334032e+09  7.866962e+09   
1           United States   49969856.0   808763.0  2.356987e+08  3.337707e+08   
2                   India   34641561.0   473537.0  8.014174e+08  1.399347e+09   
3                  Brazil   22143091.0   615674.0  1.645897e+08  2.147140e+08   
4          United Kingdom   10464389.0   145605.0  5.109464e+07  6.839459e+07   
..                    ...          ...        ...           ...           ...   
221                 Tonga          1.0        NaN  7.318300e+04  1.074030e+05   
222            Micronesia          1.0        NaN           NaN  1.167420e+05   
223              Kiribati          NaN        NaN  5.949200e+04           NaN   
224  Summer Olympics 2020          NaN        NaN           NaN           NaN   
225                 Spain          NaN        NaN  3.840914e+07  4.678061e+07   

     new_confirmed  
0     

## 计算新增排名

In [17]:
accu_nc = pd.DataFrame(dfs[0]['new_confirmed'])
accu_nc.rename(columns={'new_confirmed': 'day1'}, inplace=True)
for i in range(1, len_dfs):
    accu_nc = pd.concat([accu_nc, dfs[i]['new_confirmed']], axis=1)
    accu_nc.rename(columns={'new_confirmed': 'day' + str(i + 1)}, inplace=True)


accu_nc['total'] = 0
for i in range(len_dfs):
    accu_nc['total'] += accu_nc['day' + str(i + 1)]

accu_nc = pd.concat([dfs[0]['region'], accu_nc], axis=1)
accu_nc = accu_nc.sort_values('total', ascending=False).reset_index(drop=True)
print(accu_nc[1:11])

            region  day1     day2      day3      day4      day5      day6  \
1    United States     0  35065.0  179469.0  120811.0  152274.0  125009.0   
2          Germany     0  35983.0   39330.0   51592.0   85127.0   82789.0   
3   United Kingdom     0  43285.0   50850.0   45102.0  100640.0   58184.0   
4           France     0  42252.0   11308.0   59019.0   61340.0   56854.0   
5           Russia     0  32136.0       0.0   61848.0   30209.0   30873.0   
6           Turkey     0  19357.0   20033.0   22687.0   20874.0   19696.0   
7           Poland     0  13250.0       0.0   19369.0   56008.0   24988.0   
8      Netherlands     0  23078.0   20965.0   17911.0   37825.0    7545.0   
9          Belgium     0      0.0   41439.0   10878.0   18714.0   16029.0   
10    South Africa     0  11125.0    6381.0   13147.0   19842.0   22388.0   

        day7     total  
1   157838.0  770466.0  
2    25060.0  319881.0  
3        0.0  298061.0  
4        0.0  230773.0  
5    30288.0  185354.0  
6 

## 计算确诊率/死亡率/疫苗接种率

In [18]:
for df in dfs:
    df['confirmed_rate'] = df['confirmed'] / df['population']
    df['deceased_rate'] = df['deceased'] / df['population']
    df['vaccinated_rate'] = df['vaccinated'] / df['population']
    print(df)

                   region    confirmed   deceased    vaccinated    population  \
0                   World  265825970.0  5266612.0  4.328317e+09  7.866758e+09   
1           United States   49934791.0   808608.0  2.352980e+08  3.337654e+08   
2                   India   34633255.0   470620.0  7.997455e+08  1.399310e+09   
3                  Brazil   22138247.0   615606.0  1.645436e+08  2.147099e+08   
4          United Kingdom   10421104.0   145551.0  5.106896e+07  6.839362e+07   
..                    ...          ...        ...           ...           ...   
221                 Tonga          1.0        NaN  7.318300e+04  1.073990e+05   
222            Micronesia          1.0        NaN           NaN  1.167380e+05   
223              Kiribati          NaN        NaN  5.949200e+04           NaN   
224  Summer Olympics 2020          NaN        NaN           NaN           NaN   
225                 Spain          NaN        NaN  3.840914e+07  4.678056e+07   

     new_confirmed  confirm

## 计算全球变化趋势

In [21]:
world_confirmed = pd.DataFrame(dfs[0].loc[[0], ['confirmed']])
world_confirmed.rename(columns={'confirmed': 'day1'}, inplace=True)
for i in range(1, len_dfs):
    world_confirmed = pd.concat([world_confirmed, dfs[i].loc[[0], ['confirmed']]], axis=1)
    world_confirmed.rename(columns={'confirmed': 'day' + str(i + 1)}, inplace=True)
print(world_confirmed)

world_deceased = pd.DataFrame(dfs[0].loc[[0], ['deceased']])
world_deceased.rename(columns={'deceased': 'day1'}, inplace=True)
for i in range(1, len_dfs):
    world_deceased = pd.concat([world_deceased, dfs[i].loc[[0], ['deceased']]], axis=1)
    world_deceased.rename(columns={'deceased': 'day' + str(i + 1)}, inplace=True)

world_vaccinated = pd.DataFrame(dfs[0].loc[[0], ['vaccinated']])
world_vaccinated.rename(columns={'vaccinated': 'day1'}, inplace=True)
for i in range(1, len_dfs):
    world_vaccinated = pd.concat([world_vaccinated, dfs[i].loc[[0], ['vaccinated']]], axis=1)
    world_vaccinated.rename(columns={'vaccinated': 'day' + str(i + 1)}, inplace=True)

          day1         day2         day3         day4         day5  \
0  265825970.0  266215281.0  266775236.0  267486343.0  268390355.0   

          day6         day7  
0  269106111.0  269512224.0  


## 绘图部分

### 15日全球疫情总体变化趋势

In [None]:
import pyecharts.options as opts
from pyecharts.charts import Line
x_data = world_confirmed.columns.values.tolist()

y_confirmed = world_confirmed.loc[0].values.tolist()

world_trend = (
    Line()
    .set_global_opts(
        tooltip_opts=opts.TooltipOpts(is_show=False),
        xaxis_opts=opts.AxisOpts(type_="category"),
        yaxis_opts=opts.AxisOpts(type_='value')
    )
    .add_xaxis(xaxis_data=x_data)
    .add_yaxis(series_name="",
        y_axis=y_confirmed,)
)