In [17]:
import pandas as pd
from pathlib import Path
import os
from IPython.display import display

# viz libs
import altair as alt
import plotly.express as px

## Data

First up, I'm loading the time-series data and converting it into tidy format.

- datetime is mm/dd/yr e.g `1/26/20` in the csv files. Should be: `%m/%d/%y`

In [58]:
path = Path("COVID-19") / "csse_covid_19_data" / "csse_covid_19_time_series"
data = dict()

for f in [f for f in os.listdir(path) if f.endswith(".csv")]:
    name = f[21:].split(".")[0]
    print(f"Processing {name}: {f}")

    # read csv
    d = pd.read_csv(path / f)
    
    # convert date columns into one date col with values
    name_cols = d.columns[:4] # the first four
    d = pd.melt(d, name_cols, var_name="Date", value_name="Num")
    d["datetime"] = pd.to_datetime(d.Date) # datetime format

    # now add a type
    d["Type"] = name

    data[name] = d # add to dict

# combine the three dataframes into one
df = pd.concat(data.values(), ignore_index=True)
df.rename(columns={"Country/Region": "Country"}, inplace=True)
df.sample(1000)

Processing Deaths: time_series_19-covid-Deaths.csv
Processing Recovered: time_series_19-covid-Recovered.csv
Processing Confirmed: time_series_19-covid-Confirmed.csv


Unnamed: 0,Province/State,Country,Lat,Long,Date,Num,datetime,Type
9041,Jilin,Mainland China,43.66610,126.1923,2/14/20,25,2020-02-14,Recovered
14246,Guangdong,Mainland China,23.34170,113.4244,2/10/20,1159,2020-02-10,Confirmed
5419,,UK,55.00000,-3.0000,2/29/20,0,2020-02-29,Deaths
10268,,Qatar,25.35480,51.1839,2/22/20,0,2020-02-22,Recovered
10405,Western Australia,Australia,-31.95050,115.8605,2/23/20,0,2020-02-23,Recovered
5286,,Belgium,50.83330,4.0000,2/28/20,0,2020-02-28,Deaths
17037,"Portland, OR",US,45.50510,-122.6750,2/29/20,1,2020-02-29,Confirmed
6297,,Israel,31.00000,35.0000,1/25/20,0,2020-01-25,Recovered
6200,"San Mateo, CA",US,37.56300,-122.3255,1/24/20,0,2020-01-24,Recovered
15074,Tasmania,Australia,-41.45450,145.9707,2/15/20,0,2020-02-15,Confirmed


In [62]:
fig = px.scatter_geo(df, lat="Lat", lon="Long", color="Type",
size="Num", size_max=50,
animation_frame="Date", projection="natural earth")
fig

In [None]:
fig = px.scatter_geo(df, locations="iso_alpha", color="continent",
                     hover_name="country", size="pop",
                     animation_frame="year",
                     projection="natural earth")
fig.show()

In [41]:
#px.line(df.sample(1000), x="Date", y="Num", color="Country/Region")
px.scatter(df.tail(5000), x="Date", y="Num", color="Country")
