In [1]:
import pandas as pd
import numpy as np
from datetime import date, timedelta
import warnings
warnings.filterwarnings('ignore')

In [2]:
# preprocessing data
dxy_area = pd.read_csv('DXYArea.csv')
dxy_area = dxy_area[ (dxy_area.countryName == '中国') & (dxy_area.provinceName != '中国')]
dxy_area['updateTime'] = pd.to_datetime(dxy_area.updateTime).dt.date
dxy_area = dxy_area.drop_duplicates(['provinceName','updateTime']).reset_index(drop=True)
dxy_area = dxy_area[['provinceName','province_confirmedCount','province_curedCount','province_deadCount','updateTime']]

In [3]:
current = dxy_area.drop_duplicates(['provinceName']).reset_index(drop=True)
current['provinceName'][0] = '香港'
current['provinceName'][1] = '新疆'
current['provinceName'][2] = '北京'
current['provinceName'][3] = '四川'
current['provinceName'][4] = '甘肃'
current['provinceName'][5] = '上海'
current['provinceName'][6] = '广东'
current['provinceName'][7] = '台湾'
current['provinceName'][8] = '河北'
current['provinceName'][9] = '陕西'
current['provinceName'][10] = '山西'
current['provinceName'][11] = '云南'
current['provinceName'][12] = '重庆'
current['provinceName'][13] = '内蒙古'
current['provinceName'][14] = '山东'
current['provinceName'][15] = '浙江'
current['provinceName'][16] = '天津'
current['provinceName'][17] = '辽宁'
current['provinceName'][18] = '福建'
current['provinceName'][19] = '江苏'
current['provinceName'][20] = '海南'
current['provinceName'][21] = '澳门'
current['provinceName'][22] = '吉林'
current['provinceName'][23] = '湖北'
current['provinceName'][24] = '江西'
current['provinceName'][25] = '黑龙江'
current['provinceName'][26] = '安徽'
current['provinceName'][27] = '贵州'
current['provinceName'][28] = '湖南'
current['provinceName'][29] = '河南'
current['provinceName'][30] = '广西'
current['provinceName'][31] = '宁夏'
current['provinceName'][32] = '青海'
current['provinceName'][33] = '西藏'
current

Unnamed: 0,provinceName,province_confirmedCount,province_curedCount,province_deadCount,updateTime
0,香港,1655,1254,10,2020-07-16
1,新疆,77,73,3,2020-07-16
2,北京,929,752,9,2020-07-16
3,四川,599,590,3,2020-07-16
4,甘肃,167,165,2,2020-07-16
5,上海,732,691,7,2020-07-16
6,广东,1650,1636,8,2020-07-15
7,台湾,451,440,7,2020-07-15
8,河北,349,340,6,2020-07-15
9,陕西,321,315,3,2020-07-15


In [4]:
from pyecharts import Map

province = current['provinceName'].values.tolist()
confirmedCount = current['province_confirmedCount'].values.tolist()
# print(len(province), len(confirmedCount))

map = Map("Cumulative Confirmed Cases in China", '', width=1200, height=600)

range_color = ['#313695', '#4575b4', '#74add1', '#abd9e9', '#e0f3f8', '#ffffbf',
               '#fee090', '#fdae61', '#f46d43', '#d73027', '#a50026']
map.add("Confirmed Cases", province, confirmedCount, visual_range=[0, 2000], maptype='china', is_visualmap=True, visual_range_color=range_color, visual_text_color='#000', is_map_symbol_show=False, is_label_show=True)

In [5]:
start_date = dxy_area['updateTime'][dxy_area.shape[0]-1]
end_date = dxy_area['updateTime'][0]
num_dates = (end_date - start_date).days + 1 
dates = []
delta = timedelta(days=1)
provinces = dxy_area['provinceName']
provinces = provinces.drop_duplicates().reset_index(drop=True)
confirmedSeries = np.zeros((provinces.shape[0], num_dates), dtype=int)
curedSeries = np.zeros((provinces.shape[0], num_dates), dtype=int)
deathSeries = np.zeros((provinces.shape[0], num_dates), dtype=int)

date_idx = 0
while start_date <= end_date:
    for i in range(provinces.shape[0]):
        record = dxy_area.loc[(dxy_area['updateTime'] == start_date) & (dxy_area['provinceName'] == provinces[i])]
        if (record.shape[0] == 1):
            confirmedSeries[i][date_idx] = record['province_confirmedCount']
            curedSeries[i][date_idx] = record['province_curedCount']
            deathSeries[i][date_idx] = record['province_deadCount']
        else:
            confirmedSeries[i][date_idx] = confirmedSeries[i][date_idx - 1]
            curedSeries[i][date_idx] = curedSeries[i][date_idx - 1]
            deathSeries[i][date_idx] = deathSeries[i][date_idx - 1]
    dates.append(start_date)
    start_date += delta
    date_idx += 1

In [117]:
from pyecharts import Line

# Confirmed Cases Series 
existing = confirmedSeries - curedSeries - deathSeries
existing_all = existing.sum(0)
existing_Hubei = existing[23]
existing_nonHubei = existing_all - existing_Hubei

line = Line("Existing Confirmed Cases")
line.add("China", dates, existing_all, is_smooth=True, mark_point=["max"], 
         mark_point_symbol="diamond", mark_point_textcolor="#40ff27")
line.add("Hubei", dates, existing_Hubei, is_smooth=True, mark_point=["max"], 
         mark_point_symbol="arrow")
line.add("nonHubei", dates, existing_nonHubei, is_smooth=True, mark_point=["max"],
        xaxis_name="date", yaxis_name="Exising Confirmed Cases",
        yaxis_name_gap = 50)

In [7]:
from pyecharts import Pie

NorthEast = [25, 22, 17] # 东北地区（3个）：黑龙江、吉林、辽宁
NorthChina = [2, 16, 8, 10, 13] # 华北地区（5个）：北京、天津、河北、山西、内蒙古
CentralChina = [29, 28] # 华中地区（3个）：河南、湖北、湖南 除去湖北
EastChina = [14, 19, 26, 5, 15, 24, 18, 7] # 华东地区（8个）：山东、江苏、安徽、上海、浙江、江西、福建、台湾
SouthChina = [6, 30, 20, 0, 21] # 华南地区（5个）：广东、广西、海南、香港、澳门
NorthWest = [9, 4, 31, 32, 1] # 西北地区（5个）：陕西、甘肃、宁夏、青海、新疆
SouthWest = [3, 27, 11, 12, 33] # 西南地区（5个）：四川、贵州、云南、重庆、西藏

NorthEast_provinces = [province[i] for i in NorthEast]
NorthChina_provinces = [province[i] for i in NorthChina]
CentralChina_provinces = [province[i] for i in CentralChina]
EastChina_provinces = [province[i] for i in EastChina]
SouthChina_provinces = [province[i] for i in SouthChina]
NorthWest_provinces = [province[i] for i in NorthWest]
SouthWest_provinces = [province[i] for i in SouthWest]

NorthEast_cases = [confirmedCount[i] for i in NorthEast]
NorthChina_cases = [confirmedCount[i] for i in NorthChina]
CentralChina_cases = [confirmedCount[i] for i in CentralChina]
EastChina_cases = [confirmedCount[i] for i in EastChina]
SouthChina_cases = [confirmedCount[i] for i in SouthChina]
NorthWest_cases = [confirmedCount[i] for i in NorthWest]
SouthWest_cases = [confirmedCount[i] for i in SouthWest]

region = ['NorthEast', 'NorthChina', 'CentralChina', 'EastChina', 'SouthChina', 'NorthWest', 'SouthWest']
region_cases = [sum(NorthEast_cases), sum(NorthChina_cases), sum(CentralChina_cases), sum(EastChina_cases), 
                sum(SouthChina_cases), sum(NorthWest_cases), sum(SouthWest_cases)]

pie = Pie("Proportion of Confirmed Cases in China")
pie.add(
    "Cumulative Confirmed Cases",
    region,
    region_cases,
    radius=[0, 75],
    label_text_color=None,
    is_label_show=True,
    legend_orient="vertical",
    legend_pos="right",
)

In [8]:
pie = Pie("Proportion of Confirmed Cases in NorthEast")

pie.add(
    "Cumulative Confirmed Cases",
    NorthEast_provinces,
    NorthEast_cases,
    radius=[0, 75],
    label_text_color=None,
    is_label_show=True,
    legend_orient="vertical",
    legend_pos="right",
) 

In [9]:
pie = Pie("Proportion of Confirmed Cases in NorthChina")

pie.add(
    "Cumulative Confirmed Cases",
    NorthChina_provinces,
    NorthChina_cases,
    radius=[0, 75],
    label_text_color=None,
    is_label_show=True,
    legend_orient="vertical",
    legend_pos="right",
) 

In [10]:
pie = Pie("Proportion of Confirmed Cases in CentralChina (except Hubei)")

pie.add(
    "Cumulative Confirmed Cases",
    CentralChina_provinces,
    CentralChina_cases,
    radius=[0, 75],
    label_text_color=None,
    is_label_show=True,
    legend_orient="vertical",
    legend_pos="right",
) 

In [11]:
pie = Pie("Proportion of Confirmed Cases in EastChina")

pie.add(
    "Cumulative Confirmed Cases",
    EastChina_provinces,
    EastChina_cases,
    radius=[0, 75],
    label_text_color=None,
    is_label_show=True,
    legend_orient="vertical",
    legend_pos="right",
) 

In [12]:
pie = Pie("Proportion of Confirmed Cases in SouthChina")

pie.add(
    "Cumulative Confirmed Cases",
    SouthChina_provinces,
    SouthChina_cases,
    radius=[0, 75],
    label_text_color=None,
    is_label_show=True,
    legend_orient="vertical",
    legend_pos="right",
) 

In [13]:
pie = Pie("Proportion of Confirmed Cases in NorthWest")

pie.add(
    "Cumulative Confirmed Cases",
    NorthWest_provinces,
    NorthWest_cases,
    radius=[0, 75],
    label_text_color=None,
    is_label_show=True,
    legend_orient="vertical",
    legend_pos="right",
) 

In [14]:
pie = Pie("Proportion of Confirmed Cases in SouthWest")

pie.add(
    "Cumulative Confirmed Cases",
    SouthWest_provinces,
    SouthWest_cases,
    radius=[0, 75],
    label_text_color=None,
    is_label_show=True,
    legend_orient="vertical",
    legend_pos="right",
) 

In [122]:
from pyecharts import Scatter, Grid

scatter = Scatter("Quadrant Chart of Weekly Growth Rate & Current Confirmed Cases on 2.11")
for i in range(len(province)): 
    if i != 23:
        scatter.add(
            province[i], 
            [confirmedSeries[i, 20]], 
            [(confirmedSeries[i, 20] - confirmedSeries[i, 13]) / confirmedSeries[i, 13]], 
            xaxis_name="Current Confirmed Cases",
            yaxis_name="Weekly Growth Rate",
            legend_top="10%")

g = Grid(width=800, height=600)
g.add(scatter, grid_top="25%")
g

In [16]:
province_population = pd.read_csv('population_province.csv', header=0, sep=',')
transporation = pd.read_csv('transportation_hubei2province.csv', header=0, sep=',')
transporation['date'] = pd.to_datetime(transporation.date).dt.date

In [74]:
population = np.zeros(provinces.shape[0], dtype=float)
for i in range(provinces.shape[0]):
    record = province_population[province_population['provinceName'] == province[i]]
    population[i] = record['population'].values[0] 

In [118]:
start_date = date(2020, 1, 1)
end_date =  date(2020, 2, 3)
num_dates = (end_date - start_date).days + 1 
delta = timedelta(days=1)

passengers = np.zeros(provinces.shape[0], dtype=float)
num_records = np.zeros(provinces.shape[0], dtype=int)

while start_date <= end_date:
    for i in range(provinces.shape[0]):
        record = transporation.loc[(transporation['date'] == start_date) & (transporation['provinceName'] == current['provinceName'][i])]
        if (record.shape[0] == 1):
            passengers[i] += record['passenger'].values[0]
#             print(start_date, current['provinceName'][i], record['passenger'].values[0])
            num_records[i] += 1
    start_date += delta

for i in range(provinces.shape[0]):  
    passengers[i] /= num_records[i]  

In [123]:
validTransporationIdx = ~np.isnan(passengers)
validTransporationIdx[23] = False

sc = Scatter("Relationship between Population, Transportation and Confirmed Cases")
for i in range(len(province)):
    if validTransporationIdx[i]:
        sc.add(
            province[i],
            [population[i]],
            [passengers[i]],
            extra_data=[confirmedSeries[i, 12]],
            is_visualmap=True,
            visual_dimension=2,
            visual_orient="horizontal",
            visual_type="size",
            visual_range=[0, 724],
            visual_text_color="#000",
            legend_top="10%",
            xaxis_name="Annual permanent population (10,000 people)",
            yaxis_name="Passengers from Wuhan per day",
            yaxis_name_gap = 50
        )
        
g = Grid(width=800, height=600)
g.add(sc, grid_top="25%")
g