In [5]:
import json
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from math import log

with open("./api_token.txt") as f:
    mapbox_access_token = f.read()

In [6]:
with open("./all.json") as f:
    data = json.load(f) 

In [7]:
df = pd.DataFrame(data=data)
df["Date"] = pd.to_datetime(df.Date)
nonzero_cases = df[(df["Cases"] > 0) & (df["Status"] == "confirmed")]

In [8]:
MARKER_SIZE_MAX = 100
LOG_SHIFT = 20
wm = lambda x: np.average(x, weights=df.loc[x.index, "Cases"])
# What is the max number of cases in the entire dataset?
HIGHEST_CASE_COUNT = nonzero_cases[(nonzero_cases['Date'] == "03/13/2020")].groupby("Country").agg({"Cases":"sum"}).max()
# Normalize the data point
def normalize(x,maxval):
    def normal(y):
        return log((y+LOG_SHIFT)/LOG_SHIFT,10)
    return MARKER_SIZE_MAX*normal(x) / normal(maxval)

def case_list_for_date(cur_date):
    grouped_cases = nonzero_cases[nonzero_cases['Date'] == cur_date].groupby("Country").agg({"Lat":wm,"Lon":wm,"Cases":"sum"})
    cur_cases = grouped_cases.to_dict()
#     cur_cases["Lat"]["US"] = 39.50
#     cur_cases["Lon"]["US"] = 39.50
    return cur_cases

def get_map_for_date(cur_date):
    cur_cases = case_list_for_date(cur_date)
    lat_list = list(cur_cases["Lat"].values())
    lon_list = list(cur_cases["Lon"].values())
    name_list = list(cur_cases["Cases"].keys())
    cases_list = list(cur_cases["Cases"].values())
    text = list(map(lambda x: f"{x[0]}" if x[1]>0 else "", cur_cases["Cases"].items()))
    size_list = [normalize(c,HIGHEST_CASE_COUNT) for c in cases_list]

    fig = go.Figure(go.Scattermapbox(
            lat=lat_list,
            lon=lon_list,
            mode='markers+text',
            marker=go.scattermapbox.Marker(
                size=size_list,
                sizemin=1,
                colorscale="Bluered",
                color=size_list,
                cmin=0,
                opacity=0.3,
                cmax=MARKER_SIZE_MAX,
                symbol="circle",
                showscale=True,
                colorbar=dict(title="# of Cases", tickvals=[25,50,75,99], ticktext=["100", "1,000", "10,000","70,000"])
            ),
            text=text
        ))


    fig.update_layout(
        margin = {"l": 20,"r":20,"t":50,"b":20},
        title=f"Cumulative Number of Coronavirus Cases on {cur_date}",
        autosize=False,
        width=2000,
        height=1000,
        hovermode='closest',
        mapbox=dict(
            accesstoken=mapbox_access_token,
            bearing=0,
            center=dict(
                lat=39.82,
                lon=0.57
            ),
            pitch=0,
            zoom=2
        )
    )

    return fig

In [11]:
get_map_for_date("03-05-2020")

In [109]:
date_list = pd.date_range(start='2020-01-24', end='2020-3-14', tz="UTC")
for (frame, date) in enumerate(date_list):
    print(date)
    figure = get_map_for_date(date.strftime('%Y/%m/%d'))
    figure.write_image(f"imgs/img-{frame:03d}.png")

2020-01-24 00:00:00+00:00
2020-01-25 00:00:00+00:00
2020-01-26 00:00:00+00:00
2020-01-27 00:00:00+00:00
2020-01-28 00:00:00+00:00
2020-01-29 00:00:00+00:00
2020-01-30 00:00:00+00:00
2020-01-31 00:00:00+00:00
2020-02-01 00:00:00+00:00
2020-02-02 00:00:00+00:00
2020-02-03 00:00:00+00:00
2020-02-04 00:00:00+00:00
2020-02-05 00:00:00+00:00
2020-02-06 00:00:00+00:00
2020-02-07 00:00:00+00:00
2020-02-08 00:00:00+00:00
2020-02-09 00:00:00+00:00
2020-02-10 00:00:00+00:00
2020-02-11 00:00:00+00:00
2020-02-12 00:00:00+00:00
2020-02-13 00:00:00+00:00
2020-02-14 00:00:00+00:00
2020-02-15 00:00:00+00:00
2020-02-16 00:00:00+00:00
2020-02-17 00:00:00+00:00
2020-02-18 00:00:00+00:00
2020-02-19 00:00:00+00:00
2020-02-20 00:00:00+00:00
2020-02-21 00:00:00+00:00
2020-02-22 00:00:00+00:00
2020-02-23 00:00:00+00:00
2020-02-24 00:00:00+00:00
2020-02-25 00:00:00+00:00
2020-02-26 00:00:00+00:00
2020-02-27 00:00:00+00:00
2020-02-28 00:00:00+00:00
2020-02-29 00:00:00+00:00
2020-03-01 00:00:00+00:00
2020-03-02 0

In [37]:
wm = lambda x:  print(x,nonzero_cases[nonzero_cases['Date'] == "03/05/2020"].loc[x.index, "Cases"])

nonzero_cases[(nonzero_cases['Date'] == "03/05/2020") & (nonzero_cases["Country"] == "US")].groupby("Country").agg({"Lat":wm,"Lon":wm,"Cases":"sum"})

5451     35.4437
14811    37.6017
15851    41.8882
16215    40.6546
16475    37.3541
16527    36.0796
16579    29.5693
16631    47.1981
16683    30.7690
16735    35.9179
16787    40.7128
16891    42.3601
17047    40.9263
17099    29.7752
17151    37.7749
17203    37.8534
17255    33.7879
17307    42.1767
17359    33.2918
17411    35.8032
17463    41.1220
17515    43.9088
17567    27.9904
17619    39.0916
17671    37.5630
17723    38.5780
17775    45.7750
17827    33.8034
17879    45.5470
17931    48.0330
17983    40.7450
18035    38.4747
18087    32.7157
18139    36.5761
18191    34.0522
18243    47.6062
18295    41.7377
Name: Lat, dtype: float64 5451     45
14811     1
15851     2
16215     1
16475    20
16527     1
16579     1
16631     1
16683     1
16735     1
16787     4
16891     1
17047     2
17099     2
17151     2
17203     1
17255     3
17307     1
17359     1
17411     1
17463    18
17515     2
17567     2
17619     2
17671     2
17723     1
17775     1
17827     2
17879    

Unnamed: 0_level_0,Lat,Lon,Cases
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
US,,,217


In [55]:
nonzero_cases[nonzero_cases['Date'] == "03/05/2020"].iloc[80]

Country                            US
Province             Diamond Princess
Lat                           35.4437
Lon                           139.638
Date        2020-03-05 00:00:00+00:00
Cases                              45
Status                      confirmed
Name: 5451, dtype: object