# 疫情分析-地区数据

In [None]:
import numpy as np
import pandas as pd

# 导入matplotlib库中的额pyplot
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import warnings; warnings.filterwarnings(action='once')
# 让图表直接在Jupyter notebook中展示出来
%matplotlib inline

# 一个cell输出多行语句
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# pyecharts
# from pyecharts.charts import Bar
# from pyecharts import options as opts
# 内置主题类型可查看 pyecharts.globals.ThemeType
# from pyecharts.globals import ThemeType

plt.style.use('seaborn-whitegrid')
sns.set_style("white")
large = 22; med = 16; small = 12
params = {'axes.titlesize': large,
          'legend.fontsize': med,
          'figure.figsize': (16, 10),
          'axes.labelsize': med,
          'axes.titlesize': med,
          'xtick.labelsize': med,
          'ytick.labelsize': med,
          'figure.titlesize': large,
          # 解决负号无法正常显示的问题
          'axes.unicode_minus': False,
          # 解决中文乱码问题
          'font.sans-serif': ['SimHei']}
plt.rcParams.update(params)
# 默认设置下matplotlib做出来的图标不是很清晰，可以将图表设置成矢量格式显示
%config InlineBackend.figure_format = 'svg'

# Version
print(mpl.__version__)  #> 3.0.0
print(sns.__version__)  #> 0.9.0

In [None]:
# Prepare Data
df = pd.read_csv('data/csv/DXYArea.csv', encoding='utf8')
df.loc[0]
# 过滤数据
df = df[df['province_confirmedCount'] > 1000]
# 1. 分组拆分
# df = df.groupby('countryName')[['province_confirmedCount']].max()
# 2. 应用-拆分
def topNotZero(df):
    return df.sort_values(by='province_confirmedCount', ascending=False)[0]
df = df.groupby('countryName').apply(topNotZero)

df['colors'] = ['green' if x < 5000 else 'red' for x in df['province_confirmedCount']]
df.sort_values('province_confirmedCount', ascending=True, inplace=True)
df.reset_index(inplace=True)
# df = pd.DataFrame(df.head(10))
df.tail(10)

# Draw plot
plt.figure(figsize=(10,20), dpi= 80)
plt.hlines(y=df.index, xmin=0, xmax=df['province_confirmedCount'], color=df['colors'], alpha=0.4, linewidth=5)

# Decorations
plt.gca().set(ylabel='国家', xlabel='人数')
plt.yticks(df.index, df['countryName'], fontsize=12)
plt.title('全球现存确诊人数', fontdict={'size':20})
plt.grid(linestyle='--', alpha=0.5)
for x, y in zip(df['province_confirmedCount'], df.index):
    plt.text(x+20000, y-0.4, str(x), ha='center', va='bottom', fontsize=10.5)
plt.show()