In [2]:
import json
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from math import log

with open("./api_token.txt") as f:
    mapbox_access_token = f.read()

In [3]:
with open("./all.json") as f:
    data = json.load(f) 

In [4]:
df = pd.DataFrame(data=data)
df["Date"] = pd.to_datetime(df.Date)
nonzero_cases = df[(df["Cases"] > 0) & (df["Status"] == "confirmed")]

In [86]:
nonzero_cases[(nonzero_cases['Date'] == "03/13/2020")].sort_values(by="Cases").groupby("Country").agg({"Lat":"first","Lon":"first","Cases":"sum"})

Unnamed: 0_level_0,Lat,Lon,Cases
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,33.0000,65.0000,7
Albania,41.1533,20.1683,33
Algeria,28.0339,1.6596,26
Andorra,42.5063,1.5218,1
Antigua and Barbuda,17.0608,-61.7964,1
...,...,...,...
US,44.2405,-114.4790,2179
Ukraine,48.3794,31.1656,3
United Arab Emirates,24.0000,54.0000,85
United Kingdom,36.1408,-5.3536,801


In [107]:
MARKER_SIZE_MAX = 100
LOG_SHIFT = 20
wm = lambda x: np.average(x, weights=df.loc[x.index, "Cases"])
# What is the max number of cases in the entire dataset?
HIGHEST_CASE_COUNT = nonzero_cases[(nonzero_cases['Date'] == "03/13/2020")].groupby("Country").agg({"Cases":"sum"}).max()
# Normalize the data point
def normalize(x,maxval):
    def normal(y):
        return log((y+LOG_SHIFT)/LOG_SHIFT,10)
    return MARKER_SIZE_MAX*normal(x) / normal(maxval)

def case_list_for_date(cur_date):
    grouped_cases = nonzero_cases[nonzero_cases['Date'] == cur_date].groupby("Country").agg({"Lat":wm,"Lon":wm,"Cases":"sum"})
    cur_cases = grouped_cases.to_dict()
#     cur_cases["Lat"]["US"] = 39.50
#     cur_cases["Lon"]["US"] = 39.50
    return cur_cases

def get_map_for_date(cur_date):
    cur_cases = case_list_for_date(cur_date)
    lat_list = list(cur_cases["Lat"].values())
    lon_list = list(cur_cases["Lon"].values())
    name_list = list(cur_cases["Cases"].keys())
    cases_list = list(cur_cases["Cases"].values())
    text = list(map(lambda x: f"{x[1]}" if x[1]>1000 else "", cur_cases["Cases"].items()))
    size_list = [normalize(c,HIGHEST_CASE_COUNT) for c in cases_list]

    fig = go.Figure(go.Scattermapbox(
            lat=lat_list,
            lon=lon_list,
            mode='markers+text',
            marker=go.scattermapbox.Marker(
                size=size_list,
                sizemin=1,
                colorscale="Bluered",
                color=size_list,
                cmin=0,
                opacity=0.3,
                cmax=MARKER_SIZE_MAX,
                symbol="circle",
                showscale=True,
                colorbar=dict(title="# of Cases", tickvals=[25,50,75,99], ticktext=["100", "1,000", "10,000","70,000"])
            ),
            text=text
        ))


    fig.update_layout(
        margin = {"l": 20,"r":20,"t":50,"b":20},
        title=f"Cumulative Number of Coronavirus Cases on {cur_date}",
        autosize=False,
        width=2000,
        height=1000,
        hovermode='closest',
        mapbox=dict(
            accesstoken=mapbox_access_token,
            bearing=0,
            center=dict(
                lat=39.82,
                lon=0.57
            ),
            pitch=0,
            zoom=2
        )
    )

    return fig

In [108]:
get_map_for_date("03-13-2020")

In [9]:
date_list = pd.date_range(start='2020-01-24', end='2020-3-14', tz="UTC")
for (frame, date) in enumerate(date_list):
    print(date)
    figure = get_map_for_date(date.strftime('%Y/%m/%d'))
    figure.write_image(f"imgs/img-{frame:03d}.png")

2020-01-24 00:00:00+00:00
2020-01-25 00:00:00+00:00
2020-01-26 00:00:00+00:00
2020-01-27 00:00:00+00:00
2020-01-28 00:00:00+00:00
2020-01-29 00:00:00+00:00
2020-01-30 00:00:00+00:00
2020-01-31 00:00:00+00:00
2020-02-01 00:00:00+00:00
2020-02-02 00:00:00+00:00
2020-02-03 00:00:00+00:00
2020-02-04 00:00:00+00:00
2020-02-05 00:00:00+00:00
2020-02-06 00:00:00+00:00
2020-02-07 00:00:00+00:00
2020-02-08 00:00:00+00:00
2020-02-09 00:00:00+00:00
2020-02-10 00:00:00+00:00
2020-02-11 00:00:00+00:00
2020-02-12 00:00:00+00:00
2020-02-13 00:00:00+00:00
2020-02-14 00:00:00+00:00
2020-02-15 00:00:00+00:00
2020-02-16 00:00:00+00:00
2020-02-17 00:00:00+00:00
2020-02-18 00:00:00+00:00
2020-02-19 00:00:00+00:00
2020-02-20 00:00:00+00:00
2020-02-21 00:00:00+00:00
2020-02-22 00:00:00+00:00
2020-02-23 00:00:00+00:00
2020-02-24 00:00:00+00:00
2020-02-25 00:00:00+00:00
2020-02-26 00:00:00+00:00
2020-02-27 00:00:00+00:00
2020-02-28 00:00:00+00:00
2020-02-29 00:00:00+00:00
2020-03-01 00:00:00+00:00
2020-03-02 0

In [130]:
print(HIGHEST_CASE_COUNT)

Cases    67786
dtype: int64
