In [4]:
import numpy as np 
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [102]:
# this Google spreadsheet contains up to date information on the number of confirmed cases and deaths broken
# down by date and area. It is updated by volunteers, and runs one day behind the current date.
url = "/Users/Snapshot/Desktop/python/Coron/novel-coronavirus - data_adm1.csv"
df = pd.read_csv(url)
df = df.fillna(0)
df.columns = df.columns.str.replace("-", "/")

In [105]:
df.head()


Unnamed: 0,country,location_id,location,latitude,longitude,confirmedcases_10/01/2020,deaths_10/01/2020,confirmedcases_11/01/2020,deaths_11/01/2020,confirmedcases_12/01/2020,...,confirmedcases_10/02/2020,deaths_10/02/2020,confirmedcases_11/02/2020,deaths_11/02/2020,confirmedcases_12/02/2020,deaths_12/02/2020,confirmedcases_13/02/2020,deaths_13/02/2020,confirmedcases_14/02/2020,deaths_14/02/2020
0,Australia,470,New South Wales,-32.33,146.77,0.0,0.0,0.0,0.0,0.0,...,4.0,0.0,4.0,0.0,4.0,0.0,4.0,0.0,4,0.0
1,Australia,473,Queensland,-22.9,144.7,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,5.0,0.0,5.0,0.0,5.0,0.0,5,0.0
2,Australia,474,South Australia,-29.49,135.95,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,2.0,0.0,2.0,0.0,2.0,0.0,2,0.0
3,Australia,476,Victoria,-36.98,144.64,0.0,0.0,0.0,0.0,0.0,...,4.0,0.0,4.0,0.0,4.0,0.0,4.0,0.0,4,0.0
4,Belgium,600,Brussels,50.836,4.3753,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1,0.0


In [106]:
# extract out just the relevant geographical data and join it to another .csv which has the country codes.
# The country codes are required for the plotting function to identify countries on the map
geo_data_cols = ["country"]
geo_data_df = df[geo_data_cols].drop_duplicates()
c_codes_df = pd.read_csv(
    "country_code_mapping.csv", usecols=["country", "alpha-3_code"], index_col="country"
)
geo_data_df = geo_data_df.join(c_codes_df, how="left", on="country").set_index(
    "country"
)

dates_list = (
    df.columns.str.extract(r"(\d{2}/\d{2}/\d{4})", expand=False)
    .dropna()
    .unique()
    .to_list()
)

# create one dataframe per day, holding the location information and aggregated cases and deaths count
cases_by_date = {}
for date in dates_list:
    cases_df = (
        df.filter(like=date, axis=1)
        .rename(
            columns=lambda col: "deaths"
            if col.startswith("deaths")
            else "confirmed_cases"
        )
        .astype("uint32")
        .set_index(df["country"])
    )

    date_df = (
        geo_data_df.join(cases_df)
        .groupby("country")
        .agg({"confirmed_cases": "sum", "deaths": "sum", "alpha-3_code": "first"})
    )
    date_df = date_df[date_df["confirmed_cases"] > 0].reset_index()

    cases_by_date[date] = date_df

In [107]:
# Helper function for when we produce the frame for the map animation
def frame_args(duration):
    return {
        "frame": {"duration": duration},
        "mode": "immediate",
        "fromcurrent": True,
        "transition": {"duration": duration, "easing": "linear"},
    }

In [108]:
fig = make_subplots(rows=2, cols=1, specs=[[{"type": "scattergeo"}], [{"type": "xy"}]], row_heights=[0.8, 0.2])

# set up the geo data, the slider, the play and pause buttons, and the title
fig.layout.geo = {"showcountries": True}
fig.layout.sliders = [{"active": 0, "steps": []}]
fig.layout.updatemenus = [
    {
        "type": "buttons",
        "buttons": [
            {
                "label": "&#9654;",  # play symbol
                "method": "animate",
                "args": [None, frame_args(250)],
            },
            {
                "label": "&#9724;",
                "method": "animate",  # stop symbol
                "args": [[None], frame_args(0)],
            },
        ],
        "showactive": False,
        "direction": "left",
    }
]
fig.layout.title = {"text": "Novel Coronavirus Case Tracker", "x": 0.5}

In [109]:
frames = []
steps = []
# set up colourbar tick values, ranging from 1 to the highest num. of confirmed cases for any country thus far
max_confirmed_cases = cases_by_date[dates_list[-1]]["confirmed_cases"].max()

# to account for the significant variance in number of cases, we want the scale to be logarithmic...
high_tick = np.log1p(max_confirmed_cases)
low_tick = np.log1p(1)
log_tick_values = np.geomspace(low_tick, high_tick, num=6)

# ...however, we want the /labels/ on the scale to be the actual number of cases (i.e. not log(n_cases))
visual_tick_values = np.expm1(log_tick_values).astype(int)
visual_tick_values[
    -1
] = max_confirmed_cases  # otherwise max cbar value might be max - 1 due to a rounding error
visual_tick_values = [f"{val:,}" for val in visual_tick_values]

# generate line chart data
confirmed_cases_totals = df.filter(like="confirmedcases").astype("uint32").agg("sum").to_list()
deaths_totals = df.filter(like="deaths").astype("uint32").agg("sum").to_list()

# this loop generates the data for each frame
for i, (date, data) in enumerate(cases_by_date.items(), start=1):
    # to standardise the range and labels on the colourbar, we need to sneak in the min
    # and max number of confirmed_cases

    df = data

    # the z-scale (for calculating the colour for each country) needs to be logarithmic
    df["confirmed_cases_log"] = np.log1p(df["confirmed_cases"])

    df["text"] = (
        date
        + "<br>"
        + df["country"]
        + "<br>Confirmed cases: "
        + df["confirmed_cases"].apply(lambda x: "{:,}".format(x))
        + "<br>Deaths: "
        + df["deaths"].apply(lambda x: "{:,}".format(x))
    )

    # create the choropleth chart
    choro_trace = go.Choropleth(
        **{
            "locations": df["alpha-3_code"],
            "z": df["confirmed_cases_log"],
            "zmax": high_tick,
            "zmin": low_tick,
            "colorscale": "reds",
            "colorbar": {
                "ticks": "outside",
                "ticktext": visual_tick_values,
                "tickmode": "array",
                "tickvals": log_tick_values,
                "title": {"text": "<b>Confirmed Cases</b>"},
                "len": 0.8,
                "y": 1,
                "yanchor": "top"
            },
            "hovertemplate": df["text"],
            "name": "",
            "showlegend": False
        }
    )
    
    # create the confirmed cases trace
    confirmed_cases_trace = go.Scatter(
        x=dates_list,
        y=confirmed_cases_totals[:i],
        mode="markers" if i == 1 else "lines",
        name="Total Confirmed Cases",
        line={"color": "Red"},
        hovertemplate="%{x}<br>Total confirmed cases: %{y:,}<extra></extra>"
    )
        
    # create the deaths trace
    deaths_trace = go.Scatter(
        x=dates_list,
        y=deaths_totals[:i],
        mode="markers" if i == 1 else "lines",
        name="Total Deaths",
        line={"color": "Black"},
        hovertemplate="%{x}<br>Total deaths: %{y:,}<extra></extra>"
    )

    if i == 1:
        # the first frame is what the figure initially shows...
        fig.add_trace(choro_trace, row=1, col=1)
        fig.add_traces([confirmed_cases_trace, deaths_trace], rows=[2, 2], cols=[1, 1])
    # ...and all the other frames are appended to the `frames` list and slider
    frames.append(dict(data=[choro_trace, confirmed_cases_trace, deaths_trace], name=date))

    steps.append(
        {"args": [[date], frame_args(0)], "label": date, "method": "animate",}
    )

fig.update_xaxes(range=[0, len(dates_list)-1], visible=False)
fig.update_yaxes(range=[0, max_confirmed_cases])
fig.frames = frames
fig.layout.sliders[0].steps = steps
fig.layout.geo.domain = {"x": [0,1], "y": [0.2, 1]}
fig.update_layout(height=650, legend={"x": 0.05, "y": 0.175, "yanchor": "top", "bgcolor": "rgba(0, 0, 0, 0)"})
fig