In [85]:
from datetime import datetime
from collections import Counter

import numpy as np
import io
import requests
import pandas as pd

from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [86]:
def format_cat(cat):
    length = len(cat)
    timestamp = np.zeros(length)
    for i in range(length):
        item = cat.iloc[i]
        timestamp[i] = datetime.strptime(
            item.updateTime, "%Y-%m-%d %H:%M:%S.%f").timestamp() - datetime(
                2020, 1, 25, 0, 0, 0).timestamp()
    cat = cat.assign(timestamp=timestamp)
    return cat

In [87]:
def cal_daily_province_result(cat):
    provinces = Counter(cat.provinceName).keys()
    day_max = int(np.ceil(np.max(cat.timestamp) / 86400) + 1)
    province_list = list()
    for i in range(day_max):
        timestamp_day = 86400 * i
        item = cat[cat.timestamp < timestamp_day]
        for province in provinces:
            item_sel = item[item.provinceName == province]
            province_dict = {
                "day":
                i,
                "name":
                province,
                "date":
                datetime.fromtimestamp(
                    timestamp_day + datetime(2020, 1, 23, 0, 0, 0).timestamp())
            }
            if len(item_sel) > 0:
                for key in [
                        'confirmedCount', 'suspectedCount', 'curedCount',
                        'deadCount'
                ]:
                    query = item_sel.sort_values('timestamp',
                                                 ascending=False).iloc[0]
                    province_dict[key] = query["province_{0}".format(key)]
            else:
                for key in [
                        'confirmedCount', 'suspectedCount', 'curedCount',
                        'deadCount'
                ]:
                    province_dict[key] = 0
            province_list.append(province_dict)
    return pd.DataFrame(province_list)

In [88]:
def cal_daily_nation_result(result_p):
    result_n = dict()
    result_n['day'] = list(Counter(result_p.day).keys())
    dates = list()
    for day in result_n['day']:
        dates.append(result_p[result_p.day == day].iloc[0].date)
    result_n['date'] = dates
    for key in ['confirmedCount', 'suspectedCount', 'curedCount', 'deadCount']:
        count_nation = list()
        count_hubei = list()
        count_nothubei = list()
        for day in Counter(result_p.day).keys():
            item = result_p[result_p.day == day]
            count_nation.append(np.sum(item[key]))
            count_hubei.append(np.sum(item[item.name == '湖北省'][key]))
            count_nothubei.append(np.sum(item[item.name != '湖北省'][key]))
        result_n["{0}_nation".format(key)] = count_nation
        result_n["{0}_hubei".format(key)] = count_hubei
        result_n["{0}_nothubei".format(key)] = count_nothubei
    return pd.DataFrame(result_n)

In [89]:
def sort_province_name(result_p):
    provinces = list(Counter(result_p.name).keys())
    confirmedCount = np.zeros(len(provinces))
    for i in range(len(provinces)):
        confirmedCount[i] = np.max(result_p[result_p.name == provinces[i]].confirmedCount)
    df = pd.DataFrame({"province":provinces, "confirmedCount": confirmedCount})
    df.sort_values("confirmedCount", inplace=True, ascending=False)
    return df.province.values
    

In [90]:
url = 'https://raw.githubusercontent.com/BlankerL/DXY-COVID-19-Data/master/csv/DXYArea.csv'
s = requests.get(url).content
cat = pd.read_csv(io.StringIO(s.decode('utf-8')))
cat = format_cat(cat)
result_p = cal_daily_province_result(cat)
provinces = sort_province_name(result_p)
result_n = cal_daily_nation_result(result_p)

In [91]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.confirmedCount_hubei,
               line=dict(color='firebrick'),
               name='Hubei Cumulative'))
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.confirmedCount_hubei - result_n.curedCount_hubei -
               result_n.deadCount_hubei,
               line=dict(color='firebrick', dash='dash'),
               name='Hubei Current'))
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.confirmedCount_nothubei,
               line=dict(color='royalblue'),
               name='except Hubei Cumulative'))
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.confirmedCount_nothubei -
               result_n.curedCount_nothubei - result_n.deadCount_nothubei,
               line=dict(color='royalblue', dash='dash'),
               name='except Hubei Current'))
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.confirmedCount_nation,
               line=dict(color='green'),
               name='Nation Cumulative'))
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.confirmedCount_nation - result_n.curedCount_nation -
               result_n.deadCount_nation,
               line=dict(color='green', dash='dash'),
               name='Nation Current'))
fig.update_layout(yaxis_type="log",
                  title="Confirmed Counts",
                  xaxis_title="Date",
                  yaxis_title="N")
fig.show()

In [92]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.deadCount_hubei / result_n.confirmedCount_hubei *
               100,
               mode='lines+markers',
               name='hubei'))
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.deadCount_nothubei /
               result_n.confirmedCount_nothubei * 100,
               mode='lines+markers',
               name='nothubei'))
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.deadCount_nation / result_n.confirmedCount_nation *
               100,
               mode='lines+markers',
               name='nation'))
fig.update_layout(title="Death rate I",
                  xaxis_title="Date",
                  yaxis_title="Death Rate (%)")
fig.show()

In [93]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.deadCount_hubei / result_n.curedCount_hubei * 100,
               mode='lines+markers',
               name='hubei'))
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.deadCount_nothubei / result_n.curedCount_nothubei *
               100,
               mode='lines+markers',
               name='nothubei'))
fig.add_trace(
    go.Scatter(x=result_n.date,
               y=result_n.deadCount_nation / result_n.curedCount_nation * 100,
               mode='lines+markers',
               name='nation'))
fig.update_layout(title="Death rate II",
                  xaxis_title="Date",
                  yaxis_title="Death Rate (%)")
fig.show()

In [94]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=result_n.date.loc[1:],
               y=result_n.confirmedCount_hubei.values[1:] -
               result_n.confirmedCount_hubei.values[:-1],
               mode='lines+markers',
               name='hubei'))
fig.add_trace(
    go.Scatter(x=result_n.date.loc[1:],
               y=result_n.confirmedCount_nothubei.values[1:] -
               result_n.confirmedCount_nothubei[:-1],
               mode='lines+markers',
               name='nothubei'))
fig.add_trace(
    go.Scatter(x=result_n.date.loc[1:],
               y=result_n.confirmedCount_nation.values[1:] -
               result_n.confirmedCount_nation.values[:-1],
               mode='lines+markers',
               name='nation'))
fig.update_layout(title="Daily Confirmed Counts Increase",
                  xaxis_title="Date",
                  yaxis_title="N",
                 yaxis_type="log")
fig.show()

In [95]:
fig = go.Figure()
for province in provinces:
    result = result_p[result_p['name'] == province]
    if 10000 > np.max(result.confirmedCount) > 200:
        fig.add_trace(
            go.Scatter(x=result.date,
                       y=result.confirmedCount / np.max(result.confirmedCount),
                       mode='lines+markers',
                       name=province))
fig.update_layout(title="Cumulative Confirmed Fraction",
                  xaxis_title="Date",
                  yaxis_title="p(N)")
fig.update_yaxes(range=[0.7, 1])
fig.update_xaxes(range=[result.date.iloc[-15], result.date.iloc[-1]])
fig.show()

In [102]:
fig = go.Figure()
for province in provinces:
    result = result_p[result_p['name'] == province]
    if 10000 > np.max(result.confirmedCount) > 200:
        fig.add_trace(
            go.Scatter(x=result.date,
                       y=result.confirmedCount - result.curedCount -
                       result.deadCount,
                       mode='lines+markers',
                       name=province))
fig.update_layout(title="Current Confirmed Counts",
                  xaxis_title="Date",
                  yaxis_title="N")
fig.show()

In [97]:
fig = go.Figure()
for province in provinces:
    result = result_p[result_p['name'] == province]
    if 10000 > np.max(result.confirmedCount) > 200:
        fig.add_trace(
            go.Scatter(x=result.date,
                       y=(result.confirmedCount - result.curedCount -
                          result.deadCount) /
                       np.max(result.confirmedCount - result.curedCount -
                              result.deadCount),
                       mode='lines+markers',
                       name=province))
fig.update_layout(title="Current Confirmed Counts",
                  xaxis_title="Date",
                  yaxis_title="N")
fig.update_yaxes(range=[0.4, 1])
fig.update_xaxes(range=[result.date.iloc[-15], result.date.iloc[-1]])
fig.show()

In [98]:
fig = go.Figure()
for province in provinces:
    result = result_p[result_p['name'] == province]
    if 10000 > np.max(result.confirmedCount) > 200:
        fig.add_trace(
            go.Scatter(x=result.date,
                       y=result.curedCount / result.confirmedCount,
                       mode='lines+markers',
                       name=province))
fig.update_layout(title="Cured Rate I", xaxis_title="Date", yaxis_title="%")
fig.show()

In [99]:
fig = make_subplots(rows=2, cols=1, subplot_titles=("> 90%", "> 80%"))
for province in provinces:
    result = result_p[result_p['name'] == province]
    if 10000 > np.max(result.confirmedCount) > np.percentile(
            result_p.confirmedCount, 90):
        fig.add_trace(go.Scatter(x=result.date.loc[1:],
                                 y=result.confirmedCount.values[1:] -
                                 result.confirmedCount.values[:-1],
                                 mode='lines+markers',
                                 name=province),
                      row=1,
                      col=1)
    elif 10000 > np.max(result.confirmedCount) > np.percentile(
            result_p.confirmedCount, 80):
        fig.add_trace(go.Scatter(x=result.date.loc[1:],
                                 y=result.confirmedCount.values[1:] -
                                 result.confirmedCount.values[:-1],
                                 mode='lines+markers',
                                 name=province),
                      row=2,
                      col=1)
fig.update_layout(title="Daily confirmed increase")
fig.update_yaxes(range=[0.4, 2.2])
fig.update_xaxes(title="Date", row=1, col=1)
fig.update_xaxes(title="Date", row=2, col=1)
fig.update_yaxes(title="N", row=1, col=1, type='log')
fig.update_yaxes(title="N", row=2, col=1, type='log')
fig.show()

In [100]:
result = result_p[result_p.name == "上海市"]
result

Unnamed: 0,day,name,date,confirmedCount,suspectedCount,curedCount,deadCount
7,0,上海市,2020-01-23,0,0,0,0
39,1,上海市,2020-01-24,0,0,0,0
71,2,上海市,2020-01-25,0,0,0,0
103,3,上海市,2020-01-26,53,0,3,1
135,4,上海市,2020-01-27,66,0,4,1
167,5,上海市,2020-01-28,96,0,5,1
199,6,上海市,2020-01-29,112,0,5,1
231,7,上海市,2020-01-30,135,0,9,1
263,8,上海市,2020-01-31,169,0,10,1
295,9,上海市,2020-02-01,182,0,10,1
