# Collecting and Filtering Canadian Covid-19 Metrics
* Covid-19 data sources from [The Government of Canada](https://health-infobase.canada.ca/covid-19/epidemiological-summary-covid-19-cases.html)
* Daily numbers for British Columbia are filtered out and exported from here. This entrie notebook will be reduced to a function.

In [13]:
import pandas as pd
import numpy as np
import re
import string

In [33]:
df = pd.read_csv('https://health-infobase.canada.ca/src/data/covidLive/covid19-download.csv')

In [15]:
df.columns

Index(['pruid', 'prname', 'prnameFR', 'date', 'numconf', 'numprob',
       'numdeaths', 'numtotal', 'numtested', 'numrecover', 'percentrecover',
       'ratetested', 'numtoday', 'percentoday', 'ratetotal', 'ratedeaths',
       'numdeathstoday', 'percentdeath', 'numtestedtoday', 'numrecoveredtoday',
       'percentactive', 'numactive', 'rateactive', 'numtotal_last14',
       'ratetotal_last14', 'numdeaths_last14', 'ratedeaths_last14',
       'numtotal_last7', 'ratetotal_last7', 'numdeaths_last7',
       'ratedeaths_last7', 'avgtotal_last7', 'avgincidence_last7',
       'avgdeaths_last7', 'avgratedeaths_last7'],
      dtype='object')

In [17]:
df.groupby('prname').sum()

Unnamed: 0_level_0,pruid,numconf,numprob,numdeaths,numtotal,numtested,numrecover,percentrecover,ratetested,numtoday,...,numdeaths_last14,ratedeaths_last14,numtotal_last7,ratetotal_last7,numdeaths_last7,ratedeaths_last7,avgtotal_last7,avgincidence_last7,avgdeaths_last7,avgratedeaths_last7
prname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alberta,12432,3149052,0,47141.0,3145187,145124400.0,2586348.0,18216.29,33199239.0,46865,...,5879.0,134.43,300454.0,6873.3,3152.0,72.08,42923.0,981.84,446.0,10.38
British Columbia,16107,1478222,0,44499.0,1478222,65965150.0,1145685.0,18742.59,13007447.0,25467,...,4176.0,82.68,170256.0,3357.27,2179.0,43.27,24342.0,479.65,306.0,6.23
Canada,273,29932717,3282,1821879.0,29935973,1054627000.0,22903678.0,16030.36,28056605.0,330503,...,153959.0,409.61,2204606.0,5865.1,78655.0,209.25,314945.0,837.87,11244.0,30.05
Manitoba,11822,417157,1799,5882.0,418954,26449390.0,212666.0,16089.49,19313653.0,13544,...,2321.0,169.43,87089.0,6359.4,1392.0,101.63,12440.0,908.37,180.0,14.42
New Brunswick,3341,45701,51,462.0,45752,10844560.0,39441.0,20255.35,13960076.0,430,...,84.0,10.85,2701.0,347.8,42.0,5.46,376.0,49.8,0.0,0.84
Newfoundland and Labrador,2570,63899,69,748.0,63968,6302375.0,57144.0,21136.21,12084112.0,319,...,56.0,10.68,2119.0,406.26,28.0,5.32,292.0,58.03,0.0,0.8
Northwest Territories,15677,1389,0,0.0,1389,750246.0,1215.0,21151.87,16736846.0,15,...,0.0,0.0,102.0,227.46,0.0,0.0,14.0,32.5,0.0,0.0
Nova Scotia,3084,239482,45,13037.0,239527,15755460.0,209221.0,19626.12,16219426.0,1179,...,910.0,93.56,7982.0,821.65,455.0,46.81,1131.0,117.1,50.0,6.52
Nunavut,15934,593,0,0.0,593,364821.0,4.0,3.37,9407448.0,130,...,0.0,0.0,553.0,1426.02,0.0,0.0,75.0,203.74,0.0,0.0
Ontario,9555,9664323,0,567547.0,9664323,534401900.0,8135896.0,18379.43,36686930.0,103912,...,47073.0,323.1,694615.0,4768.6,24001.0,164.68,99239.0,681.16,3428.0,23.45


In [18]:
df_bc = df[df.prname == 'British Columbia']

In [19]:
df_bc[['date','numtoday']].tail(1)
df_bc[df_bc.numtoday == 0]

Unnamed: 0,pruid,prname,prnameFR,date,numconf,numprob,numdeaths,numtotal,numtested,numrecover,...,numdeaths_last14,ratedeaths_last14,numtotal_last7,ratetotal_last7,numdeaths_last7,ratedeaths_last7,avgtotal_last7,avgincidence_last7,avgdeaths_last7,avgratedeaths_last7
13,59,British Columbia,Colombie-Britannique,2020-02-24,6,0,0.0,6,,,...,,,,,,,,,,
19,59,British Columbia,Colombie-Britannique,2020-02-26,7,0,0.0,7,,,...,,,,,,,,,,
22,59,British Columbia,Colombie-Britannique,2020-02-27,7,0,0.0,7,,,...,,,,,,,,,,
25,59,British Columbia,Colombie-Britannique,2020-02-29,7,0,0.0,7,,,...,,,,,,,,,,
40,59,British Columbia,Colombie-Britannique,2020-03-06,21,0,0.0,21,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3372,59,British Columbia,Colombie-Britannique,2020-10-18,11189,0,251.0,11189,543791.0,9387.0,...,13.0,0.26,1004.0,19.80,6.0,0.12,143.0,2.83,1.0,0.02
3462,59,British Columbia,Colombie-Britannique,2020-10-24,12554,0,256.0,12554,581804.0,10247.0,...,11.0,0.22,1365.0,26.92,5.0,0.10,195.0,3.85,1.0,0.01
3477,59,British Columbia,Colombie-Britannique,2020-10-25,12554,0,256.0,12554,581804.0,10247.0,...,11.0,0.22,1365.0,26.92,5.0,0.10,195.0,3.85,1.0,0.01
3882,59,British Columbia,Colombie-Britannique,2020-11-21,25474,0,331.0,25474,735429.0,17477.0,...,55.0,1.08,3835.0,75.62,41.0,0.81,548.0,10.80,6.0,0.12


In [40]:
bc_df = df_bc[['prname', 'date', 'numconf', 'numdeaths', 'numtotal',
       'numtoday', 'percentoday', 'ratetotal', 'ratedeaths',
       'numdeathstoday', 'numactive', 'numtotal_last14',
       'ratetotal_last14', 'numdeaths_last14', 'ratedeaths_last14',
       'numtotal_last7', 'ratetotal_last7', 'numdeaths_last7',
       'ratedeaths_last7', 'avgtotal_last7', 'avgincidence_last7',
       'avgdeaths_last7']].groupby('date').sum()

bc_df = bc_df.reset_index().sort_values(by='date')
bc_df = bc_df[['date','numtoday','numtotal_last7','numdeathstoday','numdeaths_last7','numactive']]
bc_df.head()
bc_df.to_pickle('bc_covid_data.pkl')

In [37]:
bc_df.set_index('date').sort_index(ascending=False)

Unnamed: 0_level_0,numtoday,numtotal_last7,numdeathstoday,numdeaths_last7,numactive
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-11-22,0,3176.0,0.0,41.0,7666.0
2020-11-21,0,3835.0,0.0,41.0,7666.0
2020-11-20,516,4489.0,10.0,41.0,7666.0
2020-11-19,536,4590.0,1.0,33.0,7431.0
2020-11-18,761,4648.0,10.0,36.0,7188.0
...,...,...,...,...,...
2020-02-24,0,0.0,0.0,0.0,6.0
2020-02-21,1,0.0,0.0,0.0,6.0
2020-02-16,1,0.0,0.0,0.0,5.0
2020-02-08,3,0.0,0.0,0.0,4.0


In [52]:
def get_covid_data(df_name='df_bc_covid',globe=True):
    if globe:
            global df_bc_covid
    df_bc_covid = pd.read_csv(bc_cov19_url)
    df_bc_covid = df_bc_covid[df_bc_covid.prname == 'British Columbia']
    df_bc_covid = df_bc_covid[['date','numtoday','numtotal_last7','numdeathstoday','numdeaths_last7','numactive']]
    df_bc_covid = df_bc_covid.set_index('date')
    return df_bc_covid

In [54]:
df_bc_covid.sort_index(ascending=False)

Unnamed: 0_level_0,numtoday,numtotal_last7,numdeathstoday,numdeaths_last7,numactive
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-11-22,0,3176.0,0.0,41.0,7666.0
2020-11-21,0,3835.0,0.0,41.0,7666.0
2020-11-20,516,4489.0,10.0,41.0,7666.0
2020-11-19,536,4590.0,1.0,33.0,7431.0
2020-11-18,761,4648.0,10.0,36.0,7188.0
...,...,...,...,...,...
2020-02-24,0,,0.0,,6.0
2020-02-21,1,,0.0,,6.0
2020-02-16,1,,0.0,,5.0
2020-02-08,3,,0.0,,4.0
