In [9]:
import json
import pandas as pd

with open("../data/timeseries.json") as fp:
    raw_data = json.load(fp)
    raw_dfs = {k: pd.DataFrame(v) for k,v in raw_data.items()}
    
    dfs = []
    for country, df in raw_dfs.items():
        df['date'] = pd.to_datetime(df['date'])
        df['week'] = df['date'].apply(lambda t: t.week)
        df['week'] = df['week'] - df['week'].min()
        df['new'] = df['confirmed'].diff().fillna(0)
        df = df.groupby('week').agg(confirmed=('confirmed', 'max'), new=('new', 'mean')).reset_index()
        df['country'] = country
        df = df[(df['confirmed'] > 10) & (df['new'] > 10)]
        dfs.append(df)
        
    raw_dfs = pd.concat(dfs)

In [10]:
raw_dfs

Unnamed: 0,week,confirmed,new,country
9,9,120,11.428571,Afghanistan
10,10,170,50.000000,Afghanistan
9,9,212,17.571429,Albania
10,10,223,11.000000,Albania
8,8,201,21.857143,Algeria
...,...,...,...,...
9,9,144,14.428571,Uzbekistan
10,10,135,16.000000,Venezuela
9,9,188,10.714286,Vietnam
10,10,203,15.000000,Vietnam


In [11]:
import altair as alt

In [21]:
totals = raw_dfs.groupby('country').agg(total=('confirmed', 'max'))
countries = list(totals.index[totals['total'] > 3000]) + ['Cuba']

# countries = ['US', 'Italy', 'China', 'Spain', 'France', 'United Kingdom', 'Germany', 'Cuba', 'Korea, South', 'Japan']
# countries = raw_data.keys()
data = raw_dfs[raw_dfs['country'].isin(countries)]

chart = alt.Chart(data).mark_line().encode(
    x=alt.X('confirmed', scale=alt.Scale(type='log')),
    y=alt.Y('new', scale=alt.Scale(type='log')),
    color='country',
    tooltip='country',
)
dots = alt.Chart(data).mark_point().encode(
    x=alt.X('confirmed', scale=alt.Scale(type='log')),
    y=alt.Y('new', scale=alt.Scale(type='log')),
    color='country',
)

text = chart.mark_text(align='left').encode(
    text='country'    
)

(chart + text + dots).properties(width=600, height=600).interactive()