In [None]:
%run _prepare.ipynb

# Feature Discovery
* Which other interesting features then new_cases does the data contain?
* How can we quickly try different categorical variables

In [None]:
plot_df = raw_data.groupby(["location","continent"])[["new_cases","new_deaths"]].sum().reset_index(level=1).reset_index()
px.scatter(plot_df, x="new_cases",y="new_deaths",color="continent", hover_name="location", height=600, width=1000)

In [None]:
plot_df = raw_data.groupby(["location","continent"])[["new_cases","new_deaths"]].sum().reset_index(level=1).reset_index()
px.scatter(
    plot_df, 
    x="new_cases",y="new_deaths",color="continent", 
    hover_name="location", 
    height=600, 
    width=1000,
    marginal_y="box",
    marginal_x="box"
)

## Turn Chart into an interactive dashboard

In [None]:
most_recent = raw_data.loc[(~raw_data.continent.isna()) & (raw_data.date.astype(str)==raw_data.date.max())]

def create_scatter_plot(x="new_cases",y="new_deaths"):
    plot_df = most_recent.groupby(["location","continent"])[[x,y]].sum().reset_index(level=1).reset_index()
    return px.scatter(plot_df, x=x,y=y,color="continent", hover_name="location", height=600, width=1000)

In [None]:
numerical_columns= list(raw_data.select_dtypes(np.number).columns)
x = widgets.Dropdown(options=numerical_columns, value="new_cases")
y = widgets.Dropdown(options=numerical_columns, value="new_deaths")
interact(create_scatter_plot, x=x, y=y)

#  📝Make the time configurable within the dashboard

## Option 1: Using the dashboard

In [None]:
def create_scatter_plot(x, y, date):
    most_recent = raw_data.loc[raw_data.date.astype(str)==date.strftime("%Y-%m-%d")]
    plot_df = most_recent.groupby(["location","continent"])[[x,y]].sum().reset_index(level=1).reset_index()
    return px.scatter(plot_df, x=x,y=y,color="continent", hover_name="location", height=600, width=1000)

In [None]:
from datetime import datetime
today = datetime.strptime(raw_data.date.max(), "%Y-%m-%d").date()

In [None]:
numerical_columns= list(raw_data.select_dtypes(np.number).columns)
x = widgets.Dropdown(options=numerical_columns, value="new_cases")
y = widgets.Dropdown(options=numerical_columns, value="new_deaths")
date = widgets.DatePicker(value=today)
interact(create_scatter_plot, x=x, y=y, date=date)

## Option 2: Using plotly

In [None]:
def create_scatter_plot(x, y):
    plot_df = most_recent.groupby(["date","location","continent"])[[x,y]].sum().reset_index(level=1).reset_index()
    return px.scatter(plot_df, x=x,y=y,color="continent", hover_name="location", height=600, width=1000)

In [None]:
from datetime import datetime
today = datetime.strptime(raw_data.date.max(), "%Y-%m-%d").date()

In [None]:

x="new_cases_per_million"
y="new_deaths_per_million"  
plot_df = countries.groupby(["date","location","continent"])[[x,y]].sum().reset_index(level=1).reset_index()
plot_df["days_past"] = (plot_df.date - today).dt.days 

In [None]:
fig = px.scatter(
    plot_df, 
    x=x, y=y,
    animation_frame="days_past", 
    color="continent", 
    hover_name="location", 
    range_x=[0,250000], 
    range_y=[0,10000]
)
fig.show()

# Small Multiples

In [None]:
alt.data_transformers.disable_max_rows()

In [None]:
top_5_countries_per_continent = most_recent.groupby("continent").apply(lambda df: df.sort_values("total_cases", ascending=False).head()).location
plot_df = countries.loc[countries.location.isin(list(top_5_countries_per_continent))]
plot_df = plot_df.drop(columns="date")

In [None]:
plot_df

In [None]:
px.scatter(plot_df, x="new_cases_per_million", y="new_deaths_per_million", facet_row="year_month", facet_col="continent")

In [None]:
countries.query("location=='Italy'")

In [None]:
alt.Chart(plot_df, height=100, width=100).mark_circle().encode(
    x=alt.X(field='new_cases_per_million', aggregate='mean', type='quantitative'),
    y=alt.X(field="new_deaths_per_million", aggregate='mean', type='quantitative'),
    color="continent:N", 
    tooltip=["location","new_cases_per_million", "new_deaths_per_million"], 
    #size=alt.Size("life_expectancy", aggregate='mean', type='quantitative'),
    column="continent:N",
    row="year_month:O"
)

In [None]:
alt.Chart(plot_df, height=100, width=100).mark_circle().encode(
    x=alt.X(field='new_cases_per_million', aggregate='mean', type='quantitative'),
    y=alt.X(field="new_deaths_per_million", aggregate='mean', type='quantitative'),
    color="continent:N", 
    tooltip=["location","new_cases_per_million", "new_deaths_per_million", "life_expectancy"], 
    size=alt.Size("life_expectancy", aggregate='mean', type='quantitative'),
    column="continent:N",
    row="year_month:O"
)

In [None]:
alt.Chart(plot_df, height=100, width=100).mark_circle().encode(
    x=alt.X(field='new_cases_per_million', aggregate='mean', type='quantitative'),
    y=alt.X(field="new_deaths_per_million", aggregate='mean', type='quantitative'),
    color="continent:N", 
    tooltip=["location","new_cases_per_million", "new_deaths_per_million", "life_expectancy"], 
    size=alt.Size("life_expectancy", aggregate='mean', type='quantitative'),
    column="continent:N",
    row="year_month:O"
)

# Parallel Coordinates 

In [None]:
cols = ["total_cases_per_million","total_deaths_per_million", "people_vaccinated_per_hundred","life_expectancy", "human_development_index"]
df = most_recent[["continent","location"]+cols].dropna()
fig = px.parallel_coordinates(df, color="continent", labels=["location"])
fig

In [None]:
import plotly.express as px
df = most_recent[["continent","location", "total_cases"]].join(
    (most_recent.total_cases_per_million < 5000).astype(str).rename("incidence < 50")
).join(
    (most_recent.total_vaccinations_per_hundred > 70).astype(str).rename("70% vaccinated"))
fig = px.parallel_categories(df)
fig.show()