# Examination of Alberta's COVID-19 Trends

# Sources and References

Isha Berry's GitHub Repository: 
https://github.com/ishaberry/Covid19Canada

Interactive Dashboard:
https://art-bd.shinyapps.io/covid19canada/

For More Information:
https://ishaberry.com/

Bruno Gonçalves' Epidemic Modeling Series:

Part 1: https://medium.com/data-for-science/epidemic-modeling-101-or-why-your-covid19-exponential-fits-are-wrong-97aa50c55f8

Part 2: https://medium.com/data-for-science/epidemic-modeling-102-all-covid-19-models-are-wrong-but-some-are-useful-c81202cc6ee9

Part 3: https://medium.com/data-for-science/epidemic-modeling-103-adding-confidence-intervals-and-stochastic-effects-to-your-covid-19-models-be618b995d6b

In [0]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

Links to Raw Data:

In [0]:
# Canada-Wide Data
cases_url = 'https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/cases.csv'
mortality_url = 'https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/mortality.csv'
recovered_url = 'https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/recovered_cumulative.csv'
testing_url = 'https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/testing_cumulative.csv'

# Timeseries Data by Province
cases_ts_prov_url = 'https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/timeseries_prov/cases_timeseries_prov.csv'
active_ts_prov_url = 'https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/timeseries_prov/active_timeseries_prov.csv'
mortality_ts_prov_url = 'https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/timeseries_prov/mortality_timeseries_prov.csv'
recovered_ts_prov_url = 'https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/timeseries_prov/recovered_timeseries_prov.csv'
testing_ts_prov_url = 'https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/timeseries_prov/testing_timeseries_prov.csv'

# Timeseries Data by Health Region
cases_ts_hr_url = 'https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/timeseries_hr/cases_timeseries_hr.csv'
mortality_ts_hr_url = 'https://raw.githubusercontent.com/ishaberry/Covid19Canada/master/timeseries_hr/mortality_timeseries_hr.csv'

Load Data into Pandas Data Frames:

In [0]:
# Canada-Wide Data
cases = pd.read_csv(cases_url)
mortality = pd.read_csv(mortality_url)
recovered = pd.read_csv(recovered_url)
testing = pd.read_csv(testing_url)

# Timeseries Data by Province
cases_ts_prov = pd.read_csv(cases_ts_prov_url)
active_ts_prov = pd.read_csv(active_ts_prov_url)
mortality_ts_prov = pd.read_csv(mortality_ts_prov_url)
recovered_ts_prov = pd.read_csv(recovered_ts_prov_url)
testing_ts_prov = pd.read_csv(testing_ts_prov_url)

# Timeseries Data by Health Region
cases_ts_hr = pd.read_csv(cases_ts_hr_url)
mortality_ts_hr = pd.read_csv(mortality_ts_hr_url)

Canada-Wide Cases Data

In [7]:
# Peek at Data
print(cases.shape)
cases.head()

(72352, 16)


Unnamed: 0,case_id,provincial_case_id,age,sex,health_region,province,country,date_report,report_week,travel_yn,travel_history_country,locally_acquired,case_source,additional_info,additional_source,method_note
0,1,1,50-59,Male,Toronto,Ontario,Canada,25-01-2020,19-01-2020,1,China,,(1) https://news.ontario.ca/mohltc/en/2020/01/...,,,0.0
1,2,2,50-59,Female,Toronto,Ontario,Canada,27-01-2020,26-01-2020,1,China,,(1) https://news.ontario.ca/mohltc/en/2020/01/...,Travel and Close Contact,,0.0
2,3,1,40-49,Male,Vancouver Coastal,BC,Canada,28-01-2020,26-01-2020,1,China,,https://news.gov.bc.ca/releases/2020HLTH0015-0...,,,
3,4,3,20-29,Female,Middlesex-London,Ontario,Canada,31-01-2020,26-01-2020,1,China,,(1) https://news.ontario.ca/mohltc/en/2020/01/...,,,0.0
4,5,2,50-59,Female,Vancouver Coastal,BC,Canada,04-02-2020,02-02-2020,0,,Close Contact,https://news.gov.bc.ca/releases/2020HLTH0023-0...,The individual had close contact with family v...,,


Canada Daily Cases By Province

In [31]:
# Peek at Time Series Data
print(cases_ts_prov.shape)
cases_ts_prov.head()

(1526, 4)


Unnamed: 0,province,date_report,cases,cumulative_cases
0,Alberta,25-01-2020,0,0
1,Alberta,26-01-2020,0,0
2,Alberta,27-01-2020,0,0
3,Alberta,28-01-2020,0,0
4,Alberta,29-01-2020,0,0


In [0]:
# Convert 'date_report' values to datetime
cases_ts_prov['date_report'] = pd.to_datetime(
    cases_ts_prov['date_report'],
    dayfirst = True
    )

In [36]:
# Daily Cases Scatter Plot
fig = px.scatter(
    cases_ts_prov, 
    x = 'date_report', 
    y = 'cases', 
    color = 'province'
    )
fig.update_layout(
    title = 'Canada Daily Cases By Province', 
    yaxis_title = 'Number of Cases',
    xaxis_title = 'Date'
    )
fig.update_traces(mode = 'lines+markers')
fig.show()

In [37]:
# Cumulative Cases Scatter Plot
fig = px.scatter(
    cases_ts_prov, 
    x = 'date_report', 
    y = 'cumulative_cases', 
    color = 'province'
)
fig.update_layout(
    title = 'Canada Cumulative Cases by Province', 
    yaxis_title = 'Number of Cases',
    xaxis_title = 'Date'
  )
fig.update_traces(mode='lines+markers')
fig.show()

Alberta Case Data

In [38]:
# Create AB-Only Timeseries Data Frame 
cases_ts_hr['date_report'] = pd.to_datetime(
    cases_ts_hr['date_report'],
    dayfirst = True
    )
cases_ab_ts_hr = cases_ts_hr[cases_ts_hr['province'] == 'Alberta']
print(cases_ab_ts_hr.shape)
cases_ab_ts_hr.head()

(654, 5)


Unnamed: 0,province,health_region,date_report,cases,cumulative_cases
0,Alberta,Calgary,2020-01-25,0,0
1,Alberta,Calgary,2020-01-26,0,0
2,Alberta,Calgary,2020-01-27,0,0
3,Alberta,Calgary,2020-01-28,0,0
4,Alberta,Calgary,2020-01-29,0,0


In [44]:
# Alberta Daily Cases by Region Bar Plot
fig = px.bar(
    cases_ab_ts_hr,
    x = 'date_report',
    y = 'cases',
    color = 'health_region'
)
fig.update_layout(
    title = 'Alberta Daily Cases by Region', 
    yaxis_title = 'Number of Cases',
    xaxis_title = 'Date'
  )
# Uncomment and change to histogram if preferred
# fig.update_traces(mode='lines+markers')
fig.show()

In [41]:
# Alberta Cumulative Cases by Region Scatter Plot
fig = px.scatter(
    cases_ab_ts_hr,
    x = 'date_report',
    y = 'cumulative_cases',
    color = 'health_region'
)
fig.update_layout(
    title = 'Alberta Cumulative Cases by Region', 
    yaxis_title = 'Number of Cases',
    xaxis_title = 'Date'
  )
fig.update_traces(mode='lines+markers')
fig.show()

Target Outline:

- Canada Daily Cases by Province
- Canada Cumulative Cases by Province
- Canada Case Age Demographics
- Canada Mortality
- Canada Recovered
- Canada Testing


- Alberta Daily Cases by Health Region
- Alberta Cumulative Cases by Region
- Alberta Case Age Demographics
- Alberta Mortality
- Alberta Recovered
- Alberta Testing


- Cases/Active/Recovered/Mortality
- SI/SIR Models

In [42]:
# Look at Missing Values
print("Alberta Cases Data Shape:", cases[cases['province'] == 'Alberta'].shape)
print("\nMissing Values:")
cases[cases['province'] == 'Alberta'].isna().mean().sort_values(ascending = False)

Alberta Cases Data Shape: (6345, 16)

Missing Values:


locally_acquired          0.997794
travel_history_country    0.996217
additional_source         0.983609
additional_info           0.880851
method_note               0.523247
case_source               0.000000
travel_yn                 0.000000
report_week               0.000000
date_report               0.000000
country                   0.000000
province                  0.000000
health_region             0.000000
sex                       0.000000
age                       0.000000
provincial_case_id        0.000000
case_id                   0.000000
dtype: float64

In [0]:
# # 99.8% of 'locally_acquired' values are null

# print(cases[cases['province'] == 'Alberta']['locally_acquired'].unique().tolist())
# print(cases[cases['province'] == 'Alberta']['locally_acquired'].value_counts())

In [0]:
# # 99.6% of 'travel_history_country' values are null

# print(cases[cases['province'] == 'Alberta']['travel_history_country'].unique().tolist())
# print(cases[cases['province'] == 'Alberta']['travel_history_country'].value_counts())

In [0]:
# # 98.3% of 'additional_source' values are null

# print(cases[cases['province'] == 'Alberta']['additional_source'].unique().tolist())
# print(cases[cases['province'] == 'Alberta']['additional_source'].value_counts())

In [0]:
# # 88.1% of 'additional_info' values are null

# print(cases[cases['province'] == 'Alberta']['additional_info'].unique().tolist())
# print(cases[cases['province'] == 'Alberta']['additional_info'].value_counts())

In [0]:
# # 52.7% of 'method_note' values are null

# print(cases[cases['province'] == 'Alberta']['method_note'].unique().tolist())
# print(cases[cases['province'] == 'Alberta']['method_note'].value_counts())

In [0]:
# Drop Columns: Canada-Wide Case Id, Province, and Country
cases_ab = cases[cases['province'] == 'Alberta'].drop(
    ['case_id', 'province', 'country', 'locally_acquired', 'travel_history_country', 'additional_source', 'additional_info', 'method_note', 'case_source'], axis=1
    )
print(cases_ab.shape)
cases_ab.head()

(6300, 7)


Unnamed: 0,provincial_case_id,age,sex,health_region,date_report,report_week,travel_yn
46,1,50-59,Female,Calgary,05-03-2020,01-03-2020,1
53,2,40-49,Male,Edmonton,06-03-2020,01-03-2020,1
62,3,60-69,Male,Edmonton,08-03-2020,08-03-2020,1
63,4,30-39,Female,Calgary,08-03-2020,08-03-2020,0
74,5,70-79,Female,Edmonton,09-03-2020,08-03-2020,1


In [0]:
# fig = px.histogram(
#     cases_ab, 
#     x = 'date_report', 
#     color = 'health_region'
#   )
# fig.update_layout(
#     title = 'Daily Cases', 
#     yaxis_title = 'Number of Cases',
#     xaxis_title = 'Date'
#   )
# fig.show()

# # Why is 06-03-2020 at the end of the x-axis??????

In [0]:
# cases_ab_dt = cases_ab
# cases_ab_dt['date_report'] = pd.to_datetime(
#     cases_ab_dt['date_report'],
#     dayfirst = True
#     )
# cases_ab_sorted = cases_ab_dt.sort_values(by=['date_report', 'provincial_case_id'])

# fig = px.histogram(
#     cases_ab_sorted, 
#     x = 'date_report', 
#     color = 'health_region'
#   )
# fig.update_layout(
#     title = 'Daily Cases', 
#     yaxis_title = 'Number of Cases',
#     xaxis_title = 'Date'
#   )
# fig.show()

In [0]:
C