In [1]:
import json
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from math import log

with open("./api_token.txt") as f:
    mapbox_access_token = f.read()

In [50]:
with open("./all.json") as f:
    data = json.load(f) 

In [122]:
df = pd.DataFrame(data=data)
df["Date"] = pd.to_datetime(df.Date)
nonzero_cases = df[(df["Cases"] > 0) & (df["Status"] == "confirmed")]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Province,Date,Cases,Status
Lat,Lon,Country,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
19.1959,109.745,China,Hainan,2020-03-13 00:00:00+00:00,168,confirmed
22.1667,113.55,China,Macau,2020-03-13 00:00:00+00:00,10,confirmed
22.3,114.2,China,Hong Kong,2020-03-13 00:00:00+00:00,134,confirmed
23.3417,113.424,China,Guangdong,2020-03-13 00:00:00+00:00,1356,confirmed
23.8298,108.788,China,Guangxi,2020-03-13 00:00:00+00:00,252,confirmed
24.974,101.487,China,Yunnan,2020-03-13 00:00:00+00:00,174,confirmed
26.0789,117.987,China,Fujian,2020-03-13 00:00:00+00:00,296,confirmed
26.8154,106.875,China,Guizhou,2020-03-13 00:00:00+00:00,146,confirmed
27.6104,111.709,China,Hunan,2020-03-13 00:00:00+00:00,1018,confirmed
27.614,115.722,China,Jiangxi,2020-03-13 00:00:00+00:00,935,confirmed


In [139]:
MARKER_SIZE_MAX = 100
LOG_SHIFT = 20
# What is the max number of cases in the entire dataset?
HIGHEST_CASE_COUNT = nonzero_cases[(nonzero_cases['Date'] == "03/13/2020")].groupby(["Lat","Lon","Country"]).sum().max()
# Normalize the data point
def normalize(x,maxval):
    def normal(y):
        return log((y+LOG_SHIFT)/LOG_SHIFT,10)
    return MARKER_SIZE_MAX*normal(x) / normal(maxval)

def case_list_for_date(cur_date):
    grouped_cases = nonzero_cases[nonzero_cases['Date'] == cur_date].groupby(["Lat","Lon","Country"])
    cur_cases = grouped_cases.sum().to_dict()["Cases"]
    return cur_cases.items()

def get_map_for_date(cur_date):
    cur_cases = case_list_for_date(cur_date)
    lat_list = list(map(lambda x: x[0][0], cur_cases))
    lon_list = list(map(lambda x: x[0][1], cur_cases))
    name_list = list(map(lambda x: x[0][2], cur_cases))
    cases_list = list(map(lambda x: x[1], cur_cases))
    text = list(map(lambda x: f"{x[1]}" if x[1]>100 else "", cur_cases))
    size_list = [normalize(c,HIGHEST_CASE_COUNT) for c in cases_list]

    fig = go.Figure(go.Scattermapbox(
            lat=lat_list,
            lon=lon_list,
            mode='markers+lines',
            marker=go.scattermapbox.Marker(
                size=size_list,
                sizemin=1,
                colorscale="Bluered",
                color=size_list,
                cmin=0,
                cmax=MARKER_SIZE_MAX,
                symbol="circle",
                showscale=True,
                colorbar=dict(title="# of Cases", tickvals=[25,50,75,99], ticktext=["50", "250", "1000","4000"])
            ),
            text=text
        ))


    fig.update_layout(
        margin = {"l": 20,"r":20,"t":50,"b":20},
        title=f"Cumulative Number of Coronavirus Cases on {cur_date}",
        autosize=False,
        width=1600,
        height=900,
        hovermode='closest',
        mapbox=dict(
            accesstoken=mapbox_access_token,
            bearing=0,
            center=dict(
                lat=39.82,
                lon=0.57
            ),
            pitch=0,
            zoom=1.5
        )
    )

    return fig

In [140]:
get_map_for_date("03-13-2020")

In [None]:
date_list = pd.date_range(start='2020-01-24', end='2020-3-14', tz="UTC")
for (frame, date) in enumerate(date_list):
    print(date)
    figure = get_map_for_date(date.strftime('%Y/%m/%d'))
    figure.write_image(f"covid_imgs/img-{frame:03d}.png")

2020-01-24 00:00:00+00:00
[4.996996444695609]
2020-01-25 00:00:00+00:00
[2.6619095042151066, 7.076808932954485]
2020-01-26 00:00:00+00:00
[4.996996444695609, 8.951701059981758]
2020-01-27 00:00:00+00:00
[2.6619095042151066, 2.6619095042151066, 2.6619095042151066, 7.076808932954485, 10.658456510396194]
2020-01-28 00:00:00+00:00
[4.996996444695609, 4.996996444695609, 4.996996444695609, 8.951701059981758, 12.224765418138455]
2020-01-29 00:00:00+00:00
[7.076808932954485, 7.076808932954485, 7.076808932954485, 10.658456510396194, 13.671994932364832]
2020-01-30 00:00:00+00:00
[8.951701059981758, 8.951701059981758, 8.951701059981758, 12.224765418138455, 15.01697891109911]
2020-01-31 00:00:00+00:00
[10.658456510396194, 10.658456510396194, 10.658456510396194, 13.671994932364832, 16.27321604089236]
2020-02-01 00:00:00+00:00
[12.224765418138455, 12.224765418138455, 12.224765418138455, 2.6619095042151066, 16.27321604089236, 17.451696551646908]


In [130]:
print(HIGHEST_CASE_COUNT)

Cases    67786
dtype: int64
