# Collecting and Filtering Canadian Covid-19 Metrics
* Covid-19 data sources from [The Government of Canada](https://health-infobase.canada.ca/covid-19/epidemiological-summary-covid-19-cases.html)
* Daily numbers for British Columbia are filtered out and exported from here. This entrie notebook will be reduced to a function.

In [8]:
import pandas as pd
import numpy as np
import re
import string

In [22]:
df = pd.read_csv('https://health-infobase.canada.ca/src/data/covidLive/covid19-download.csv')

In [23]:
df.columns

Index(['pruid', 'prname', 'prnameFR', 'date', 'numconf', 'numprob',
       'numdeaths', 'numtotal', 'numtested', 'numrecover', 'percentrecover',
       'ratetested', 'numtoday', 'percentoday', 'ratetotal', 'ratedeaths',
       'numdeathstoday', 'percentdeath', 'numtestedtoday', 'numrecoveredtoday',
       'percentactive', 'numactive', 'rateactive', 'numtotal_last14',
       'ratetotal_last14', 'numdeaths_last14', 'ratedeaths_last14',
       'numtotal_last7', 'ratetotal_last7', 'numdeaths_last7',
       'ratedeaths_last7', 'avgtotal_last7', 'avgincidence_last7',
       'avgdeaths_last7', 'avgratedeaths_last7'],
      dtype='object')

In [24]:
df_bc = df[df.prname == 'British Columbia']

In [25]:
df_bc.sort_values(by='numtoday',ascending=False)

Unnamed: 0,pruid,prname,prnameFR,date,numconf,numprob,numdeaths,numtotal,numtested,numrecover,...,numdeaths_last14,ratedeaths_last14,numtotal_last7,ratetotal_last7,numdeaths_last7,ratedeaths_last7,avgtotal_last7,avgincidence_last7,avgdeaths_last7,avgratedeaths_last7
3492,59,British Columbia,Colombie-Britannique,2020-10-26,13371,0,259.0,13371,581804.0,10734.0,...,14.0,0.28,1684.0,33.21,6.0,0.12,241.0,4.74,1.0,0.02
3837,59,British Columbia,Colombie-Britannique,2020-11-18,24422,0,320.0,24422,714714.0,16914.0,...,47.0,0.93,4648.0,91.65,36.0,0.71,664.0,13.09,5.0,0.10
3822,59,British Columbia,Colombie-Britannique,2020-11-17,23661,0,310.0,23661,709169.0,16469.0,...,38.0,0.75,4422.0,87.20,26.0,0.51,632.0,12.46,4.0,0.07
3882,59,British Columbia,Colombie-Britannique,2020-11-21,26187,0,331.0,26187,735429.0,17477.0,...,55.0,1.08,4548.0,89.68,41.0,0.81,650.0,12.81,6.0,0.12
3792,59,British Columbia,Colombie-Britannique,2020-11-15,22298,0,290.0,22298,690920.0,14901.0,...,27.0,0.53,4046.0,79.78,14.0,0.28,578.0,11.40,2.0,0.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
852,59,British Columbia,Colombie-Britannique,2020-05-03,2171,0,114.0,2171,81061.0,1376.0,...,36.0,0.71,223.0,4.40,14.0,0.28,32.0,0.63,2.0,0.04
1572,59,British Columbia,Colombie-Britannique,2020-06-20,2790,0,168.0,2790,151626.0,2444.0,...,1.0,0.02,81.0,1.60,0.0,0.00,12.0,0.23,0.0,0.00
3357,59,British Columbia,Colombie-Britannique,2020-10-17,11189,0,251.0,11189,543791.0,9387.0,...,13.0,0.26,1004.0,19.80,6.0,0.12,143.0,2.83,1.0,0.02
3372,59,British Columbia,Colombie-Britannique,2020-10-18,11189,0,251.0,11189,543791.0,9387.0,...,13.0,0.26,1004.0,19.80,6.0,0.12,143.0,2.83,1.0,0.02


In [26]:
print('Most recent date recorded:', df_bc['date'].tail(1).values)
print('Number of cases announced today ')
# Days with 0 reported cases - Would include beginning of pandemic when testing limited, as well as weekends and holidays when reports were not published.
df_bc[df_bc.numtoday == 0]

Most recent date recorded: ['2020-11-23']
Number of cases announced today 


Unnamed: 0,pruid,prname,prnameFR,date,numconf,numprob,numdeaths,numtotal,numtested,numrecover,...,numdeaths_last14,ratedeaths_last14,numtotal_last7,ratetotal_last7,numdeaths_last7,ratedeaths_last7,avgtotal_last7,avgincidence_last7,avgdeaths_last7,avgratedeaths_last7
13,59,British Columbia,Colombie-Britannique,2020-02-24,6,0,0.0,6,,,...,,,,,,,,,,
19,59,British Columbia,Colombie-Britannique,2020-02-26,7,0,0.0,7,,,...,,,,,,,,,,
22,59,British Columbia,Colombie-Britannique,2020-02-27,7,0,0.0,7,,,...,,,,,,,,,,
25,59,British Columbia,Colombie-Britannique,2020-02-29,7,0,0.0,7,,,...,,,,,,,,,,
40,59,British Columbia,Colombie-Britannique,2020-03-06,21,0,0.0,21,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3282,59,British Columbia,Colombie-Britannique,2020-10-12,10185,0,245.0,10185,501496.0,8502.0,...,12.0,0.24,446.0,8.79,3.0,0.06,64.0,1.26,0.0,0.01
3357,59,British Columbia,Colombie-Britannique,2020-10-17,11189,0,251.0,11189,543791.0,9387.0,...,13.0,0.26,1004.0,19.80,6.0,0.12,143.0,2.83,1.0,0.02
3372,59,British Columbia,Colombie-Britannique,2020-10-18,11189,0,251.0,11189,543791.0,9387.0,...,13.0,0.26,1004.0,19.80,6.0,0.12,143.0,2.83,1.0,0.02
3462,59,British Columbia,Colombie-Britannique,2020-10-24,12554,0,256.0,12554,581804.0,10247.0,...,11.0,0.22,1365.0,26.92,5.0,0.10,195.0,3.85,1.0,0.01


In [31]:
bc_df = df_bc[['prname', 'date', 'numconf', 'numdeaths', 'numtotal',
       'numtoday', 'percentoday', 'ratetotal', 'ratedeaths',
       'numdeathstoday', 'numactive', 'numtotal_last14',
       'ratetotal_last14', 'numdeaths_last14', 'ratedeaths_last14',
       'numtotal_last7', 'ratetotal_last7', 'numdeaths_last7',
       'ratedeaths_last7', 'avgtotal_last7', 'avgincidence_last7',
       'avgdeaths_last7']]

bc_df = bc_df.reset_index().sort_values(by='date').set_index('date').sort_index(ascending=False).drop('index', axis=1)
#bc_df = bc_df[['numtoday','numtotal_last7','numdeathstoday','numdeaths_last7','numactive']]
bc_df.to_pickle('~/data_bootcamp/data-science-final-project/data/bc_covid_data.sav')
bc_df.head(30)

Unnamed: 0_level_0,prname,numconf,numdeaths,numtotal,numtoday,percentoday,ratetotal,ratedeaths,numdeathstoday,numactive,...,ratetotal_last14,numdeaths_last14,ratedeaths_last14,numtotal_last7,ratetotal_last7,numdeaths_last7,ratedeaths_last7,avgtotal_last7,avgincidence_last7,avgdeaths_last7
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-11-23,British Columbia,27407,348.0,27407,594,2.22,540.43,6.86,17.0,7990.0,...,171.41,67.0,1.32,4463.0,88.0,49.0,0.97,638.0,12.57,7.0
2020-11-22,British Columbia,26813,331.0,26813,626,2.39,528.72,6.53,0.0,9005.0,...,168.81,55.0,1.08,4515.0,89.03,41.0,0.81,645.0,12.72,6.0
2020-11-21,British Columbia,26187,331.0,26187,713,2.8,516.37,6.53,0.0,8379.0,...,167.04,55.0,1.08,4548.0,89.68,41.0,0.81,650.0,12.81,6.0
2020-11-20,British Columbia,25474,331.0,25474,516,2.07,502.31,6.53,10.0,7666.0,...,164.16,56.0,1.1,4489.0,88.52,41.0,0.81,641.0,12.65,6.0
2020-11-19,British Columbia,24958,321.0,24958,536,2.19,492.14,6.33,1.0,7431.0,...,165.6,48.0,0.95,4590.0,90.51,33.0,0.65,656.0,12.93,5.0
2020-11-18,British Columbia,24422,320.0,24422,761,3.22,481.57,6.31,10.0,7188.0,...,163.41,47.0,0.93,4648.0,91.65,36.0,0.71,664.0,13.09,5.0
2020-11-17,British Columbia,23661,310.0,23661,717,3.13,466.56,6.11,11.0,6882.0,...,155.01,38.0,0.75,4422.0,87.2,26.0,0.51,632.0,12.46,4.0
2020-11-16,British Columbia,22944,299.0,22944,646,2.9,452.43,5.9,9.0,6558.0,...,146.77,30.0,0.59,4230.0,83.41,18.0,0.35,604.0,11.92,3.0
2020-11-15,British Columbia,22298,290.0,22298,659,3.05,439.69,5.72,0.0,7107.0,...,141.5,27.0,0.53,4046.0,79.78,14.0,0.28,578.0,11.4,2.0
2020-11-14,British Columbia,21639,290.0,21639,654,3.12,426.69,5.72,0.0,6448.0,...,136.18,27.0,0.53,3923.0,77.36,14.0,0.28,560.0,11.05,2.0


In [117]:
def get_covid_data(df_name='df_bc_covid',globe=True):
    if globe:
            global df_bc_covid
    df_bc_covid = pd.read_csv(bc_cov19_url)
    df_bc_covid = df_bc_covid[df_bc_covid.prname == 'British Columbia']
    df_bc_covid = df_bc_covid.set_index('date').fillna(0)
    return df_bc_covid

In [118]:
get_covid_data()

Unnamed: 0_level_0,pruid,prname,prnameFR,numconf,numprob,numdeaths,numtotal,numtested,numrecover,percentrecover,...,numdeaths_last14,ratedeaths_last14,numtotal_last7,ratetotal_last7,numdeaths_last7,ratedeaths_last7,avgtotal_last7,avgincidence_last7,avgdeaths_last7,avgratedeaths_last7
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-31,59,British Columbia,Colombie-Britannique,1,0,0.0,1,0.0,0.0,0.00,...,0.0,0.00,0.0,0.00,0.0,0.00,0.0,0.00,0.0,0.00
2020-02-08,59,British Columbia,Colombie-Britannique,4,0,0.0,4,0.0,0.0,0.00,...,0.0,0.00,0.0,0.00,0.0,0.00,0.0,0.00,0.0,0.00
2020-02-16,59,British Columbia,Colombie-Britannique,5,0,0.0,5,0.0,0.0,0.00,...,0.0,0.00,0.0,0.00,0.0,0.00,0.0,0.00,0.0,0.00
2020-02-21,59,British Columbia,Colombie-Britannique,6,0,0.0,6,0.0,0.0,0.00,...,0.0,0.00,0.0,0.00,0.0,0.00,0.0,0.00,0.0,0.00
2020-02-24,59,British Columbia,Colombie-Britannique,6,0,0.0,6,0.0,0.0,0.00,...,0.0,0.00,0.0,0.00,0.0,0.00,0.0,0.00,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-11-18,59,British Columbia,Colombie-Britannique,24422,0,320.0,24422,714714.0,16914.0,69.26,...,47.0,0.93,4648.0,91.65,36.0,0.71,664.0,13.09,5.0,0.10
2020-11-19,59,British Columbia,Colombie-Britannique,24958,0,321.0,24958,721552.0,17206.0,68.94,...,48.0,0.95,4590.0,90.51,33.0,0.65,656.0,12.93,5.0,0.09
2020-11-20,59,British Columbia,Colombie-Britannique,25474,0,331.0,25474,727259.0,17477.0,68.61,...,56.0,1.10,4489.0,88.52,41.0,0.81,641.0,12.65,6.0,0.12
2020-11-21,59,British Columbia,Colombie-Britannique,25474,0,331.0,25474,735429.0,17477.0,68.61,...,55.0,1.08,3835.0,75.62,41.0,0.81,548.0,10.80,6.0,0.12
