In [1]:
import json
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from math import log

with open("./api_token.txt") as f:
    mapbox_access_token = f.read()

In [50]:
with open("./all.json") as f:
    data = json.load(f) 

In [51]:
df = pd.DataFrame(data=data)
df["Date"] = pd.to_datetime(df.Date)
nonzero_cases = df[(df["Cases"] > 0)]

In [70]:
nonzero_cases[(nonzero_cases['Date'] == "03/13/2020") & (nonzero_cases['Status'] == "confirmed")].groupby(["Country"]).sum().to_dict()

{'Lat': {'Afghanistan': 33.0,
  'Albania': 41.1533,
  'Algeria': 28.0339,
  'Andorra': 42.5063,
  'Antigua and Barbuda': 17.0608,
  'Argentina': -38.4161,
  'Armenia': 40.0691,
  'Aruba': 12.5211,
  'Australia': -255.9695,
  'Austria': 47.5162,
  'Azerbaijan': 40.1431,
  'Bahrain': 26.0275,
  'Bangladesh': 23.685,
  'Belarus': 53.7098,
  'Belgium': 50.8333,
  'Bhutan': 27.5142,
  'Bolivia': -16.2902,
  'Bosnia and Herzegovina': 43.9159,
  'Brazil': -14.235,
  'Brunei': 4.5353,
  'Bulgaria': 42.7339,
  'Burkina Faso': 12.2383,
  'Cambodia': 11.55,
  'Cameroon': 3.848,
  'Canada': 398.3247,
  'Cayman Islands': 19.3133,
  'Chile': -35.6751,
  'China': 1083.3366999999998,
  'Colombia': 4.5709,
  'Congo (Kinshasa)': -4.0383,
  'Costa Rica': 9.7489,
  "Cote d'Ivoire": 7.54,
  'Croatia': 45.1,
  'Cruise Ship': 35.4437,
  'Cuba': 22.0,
  'Cyprus': 35.1264,
  'Czechia': 49.8175,
  'Denmark': 118.1565,
  'Dominican Republic': 18.7357,
  'Ecuador': -1.8312,
  'Egypt': 26.0,
  'Estonia': 58.5953,


In [20]:
MARKER_SIZE_MAX = 100
LOG_SHIFT = 20
# What is the max number of cases in the entire dataset?
HIGHEST_CASE_COUNT = nonzero_cases.groupby(["Lat","Lon","Province"]).sum().max()
# Normalize the data point
def normalize(x,maxval):
    def normal(y):
        return log((y+LOG_SHIFT)/LOG_SHIFT,10)
    return MARKER_SIZE_MAX*normal(x) / normal(maxval)

def case_list_for_date(cur_date):
    grouped_cases = nonzero_cases[nonzero_cases['Date'] < cur_date].groupby(["Lat","Lon","Province"])
    cur_cases = grouped_cases.sum().to_dict()["Cases"]
    return cur_cases.items()

def get_map_for_date(cur_date):
    cur_cases = case_list_for_date(cur_date)
    lat_list = list(map(lambda x: x[0][0], cur_cases))
    lon_list = list(map(lambda x: x[0][1], cur_cases))
    name_list = list(map(lambda x: x[0][2], cur_cases))
    cases_list = list(map(lambda x: x[1], cur_cases))
    text = list(map(lambda x: f"{x[1]}" if x[1]>100 else "", cur_cases))
    size_list = [normalize(c,HIGHEST_CASE_COUNT) for c in cases_list]

    fig = go.Figure(go.Scattermapbox(
            lat=lat_list,
            lon=lon_list,
            mode='markers+text',
            marker=go.scattermapbox.Marker(
                size=size_list,
                sizemin=1,
                colorscale="Bluered",
                color=size_list,
                cmin=0,
                cmax=MARKER_SIZE_MAX,
                symbol="circle",
                showscale=True,
                colorbar=dict(title="# of Cases", tickvals=[25,50,75,99], ticktext=["50", "250", "1000","4000"])
            ),
            text=text
        ))


    fig.update_layout(
        margin = {"l": 20,"r":20,"t":50,"b":20},
        title=f"Cumulative Number of Coronavirus Cases on {cur_date}",
        autosize=False,
        width=1600,
        height=900,
        hovermode='closest',
        mapbox=dict(
            accesstoken=mapbox_access_token,
            bearing=0,
            center=dict(
                lat=39.82,
                lon=-98.57
            ),
            pitch=0,
            zoom=4
        )
    )

    return fig

In [22]:
get_map_for_date("03-14-2020")

In [None]:
date_list = pd.date_range(start='2020-01-24', end='2020-3-14', tz="UTC")
for (frame, date) in enumerate(date_list):
    print(date)
    figure = get_map_for_date(date.strftime('%Y/%m/%d'))
    figure.write_image(f"covid_imgs/img-{frame:03d}.png")

2020-01-24 00:00:00+00:00
[4.996996444695609]
2020-01-25 00:00:00+00:00
[2.6619095042151066, 7.076808932954485]
2020-01-26 00:00:00+00:00
[4.996996444695609, 8.951701059981758]
2020-01-27 00:00:00+00:00
[2.6619095042151066, 2.6619095042151066, 2.6619095042151066, 7.076808932954485, 10.658456510396194]
2020-01-28 00:00:00+00:00
[4.996996444695609, 4.996996444695609, 4.996996444695609, 8.951701059981758, 12.224765418138455]
2020-01-29 00:00:00+00:00
[7.076808932954485, 7.076808932954485, 7.076808932954485, 10.658456510396194, 13.671994932364832]
2020-01-30 00:00:00+00:00
[8.951701059981758, 8.951701059981758, 8.951701059981758, 12.224765418138455, 15.01697891109911]
2020-01-31 00:00:00+00:00
[10.658456510396194, 10.658456510396194, 10.658456510396194, 13.671994932364832, 16.27321604089236]
2020-02-01 00:00:00+00:00
[12.224765418138455, 12.224765418138455, 12.224765418138455, 2.6619095042151066, 16.27321604089236, 17.451696551646908]


In [29]:
normal_inverse(20)

1999999999999999999980

NameError: name 'normal_inverse' is not defined