# Detailed CA data

This notebook pulls in detailed data from Canada using the [ViriHealth](https://virihealth.com) website.

This notebook requires `lxml`.

In [None]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import pycountry
import datetime
import pygsheets
import pytz

In [None]:
# papermill parameters
output_folder = "../output/"

In [None]:
gc = pygsheets.authorize(service_account_env_var = 'GSHEET_API_CREDENTIALS')

sh = gc.open_by_url('https://docs.google.com/spreadsheets/d/1C59nxtgcnwGyo6lgypsgN18duxmwWigjeVdKY58t0mU')

cases = sh.worksheet_by_title("Det").get_as_df()
deaths = sh.worksheet_by_title("Dth").get_as_df()

In [None]:
cases = cases.groupby(["Date","Health Region","Prov"],as_index=False).sum()

In [None]:

agg_deaths = deaths.groupby(["Announced","Region","Prov"],as_index=False).count()

agg_deaths = agg_deaths.filter(["Announced","Region","Prov","#"]).rename(columns={"Announced":"Date","Prov":"Province","#": "Deaths"})

In [None]:
cases = cases.rename(columns={"Health Region":"Region","Prov":"Province","Total": "Cases"})

In [None]:
province_level_df = cases.merge(agg_deaths,how='outer',on=['Date','Province','Region'],validate='one_to_one')

In [None]:
province_level_df["Date"] = pd.to_datetime( province_level_df["Date"].str.replace('-', '-2020-'), format="%d-%Y-%b")
province_level_df['Last_Reported_Flag'] = province_level_df['Date'] == province_level_df['Date'].max()

### Replacements for ISO-3316-2 compliance

In [None]:
province_level_df["State/Region"] = "Canada"
province_level_df["ISO3166_1"] = "CA"
province_level_df["ISO3166_2"] = province_level_df["Province"]
province_level_df["Province"] = province_level_df["Province"].str.replace("RC","ON").apply(lambda n: pycountry.subdivisions.get(code="CA-"+ n).name )



### Add time stamp for last update

In [None]:
province_level_df["Last_Updated_Date"] = datetime.datetime.utcnow()



# Export to CSV

In [None]:
province_level_df.to_csv(path_or_buf=output_folder + "VH_CAN_DETAILED.csv",
                         header=True,
                         columns=["Date","Province", "Region", "Cases", "Deaths", "ISO3166_1", "ISO3166_2","Last_Reported_Flag", "Last_Updated_Date"],
                         index=False)