In [2]:
import pandas as pd
import plotly.express as px  # (version 4.7.0)
import plotly.graph_objects as go

import dash  # (version 1.12.0) pip install dash
import dash_table
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

from datetime import date
import datetime

In [3]:
data = pd.read_csv('../data/percent_positive_by_agegrp.csv')
print(data['age_category'].unique())
data.head()

['0to13' '14to17' '18to24' '25to64' '65+']


Unnamed: 0,DATE,age_category,percent_positive_7d_avg
0,01-May-20,0to13,0.034
1,02-May-20,0to13,0.035
2,03-May-20,0to13,0.036
3,04-May-20,0to13,0.0359
4,05-May-20,0to13,0.0355


## Web Scraping

In [4]:
import requests
from bs4 import BeautifulSoup
import re

In [5]:
def get_updated_date(url='https://globalnews.ca/news/6859636/ontario-coronavirus-timeline/'):
    '''
    return updated date from website
    '''
    page = requests.get(url)

    soup = BeautifulSoup(page.content, 'html.parser')
    return soup.find_all('span', text=re.compile("^Updated"))[0].text

In [6]:
def get_news_table(url = 'https://globalnews.ca/news/6859636/ontario-coronavirus-timeline/'):
    '''
    Get the news data from URL
    '''
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    ps = soup.find_all('p')
    events = [ps[i].text.replace(u'\xa0', u' ') for i in range(6,len(ps)) if ps[i].find('strong')]
    events = events[::-1]
    dates = [e.split(':',1)[0] for e in events]
    descriptions = [e.split(':',1)[1].strip()+'\n' for e in events]
    news = pd.DataFrame({'Date':dates, 'Description':descriptions})
    
    return news

In [7]:
import numpy as np
from sklearn.model_selection import KFold
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
y = np.array([1, 2, 3, 4])
kf = KFold(n_splits=2)
print('first:',kf.get_n_splits(X))

print(kf)

for train_index, test_index in kf.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]


first: 2
KFold(n_splits=2, random_state=None, shuffle=False)
TRAIN: [2 3] TEST: [0 1]
TRAIN: [0 1] TEST: [2 3]


In [8]:
news_df = get_news_table()

## Data Exploration

Things to investigate:
- distribution of positives by age category
- time series comparison
- 

In [9]:
data.groupby(['age_category']).describe()

Unnamed: 0_level_0,percent_positive_7d_avg,percent_positive_7d_avg,percent_positive_7d_avg,percent_positive_7d_avg,percent_positive_7d_avg,percent_positive_7d_avg,percent_positive_7d_avg,percent_positive_7d_avg
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
age_category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
0to13,252.0,0.029365,0.030076,0.0051,0.009275,0.02305,0.039025,0.1767
14to17,252.0,0.036362,0.030307,0.0067,0.014275,0.0228,0.0571,0.1513
18to24,252.0,0.031844,0.021971,0.0048,0.010375,0.03165,0.05055,0.0969
25to64,252.0,0.024693,0.017879,0.0042,0.007775,0.02045,0.039325,0.0684
65+,252.0,0.020341,0.01606,0.0023,0.0047,0.0163,0.033125,0.0557


In [10]:
data['DATE'] = pd.to_datetime(data['DATE'])
data

Unnamed: 0,DATE,age_category,percent_positive_7d_avg
0,2020-05-01,0to13,0.0340
1,2020-05-02,0to13,0.0350
2,2020-05-03,0to13,0.0360
3,2020-05-04,0to13,0.0359
4,2020-05-05,0to13,0.0355
...,...,...,...
1255,2021-01-03,65+,0.0532
1256,2021-01-04,65+,0.0526
1257,2021-01-05,65+,0.0537
1258,2021-01-06,65+,0.0549


In [11]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
myFmt = mdates.DateFormatter('%d')


cats = data['age_category'].unique()

fig, axs = plt.subplots(3, 2, figsize=(20, 30))

for i, cat in enumerate(cats):
    a = i//2
    b = i%2
    
    x = pd.to_datetime(data[data['age_category'] == cat]['DATE']).sort_values().values
    y = data[data['age_category'] == cat]['percent_positive_7d_avg'].values
    
    axs[a, b].plot(x, y)
    axs[a, b].set_title(cat)
    
    xt_idx = list(range(0,len(x),1))
    #print(xt_idx)
    #axs[a, b].xticks(xt_idx, [x[k] for k in xt_idx])
    
    #print('xticks:',xt_idx)
    #print('x labels:',[x[k] for k in xt_idx])
    
   # axs[a, b].xaxis.set_major_formatter(myFmt)
    
    #axs[a, b].set_xticks(range(len(x))) 
    #axs[a, b].set_xticklabels([str(x[k])[:10] for k in xt_idx], fontsize=12)

#     xticks(np.arange(12), calendar.month_name[1:13], rotation=20)
    
# for ax in axs.flat:
#     ax.set(xlabel='x-label', ylabel='y-label')

# # Hide x labels and tick labels for top plots and y ticks for right plots.
# for ax in axs.flat:
#     ax.label_outer()


In [12]:
# fig, ax = plt.subplots(1)
# ax.hist(np.random.randn(100))
# ax.set_xticks(range(9))

In [13]:
# data cleaning/exploration

In [14]:

odata = pd.read_csv('../data/on_cases_by_region.csv')

# clean on_cases by region
def clean_region(data_input):
    data = data_input.copy()
    cols = ['FILE_DATE', 'PHU_NAME', 'ACTIVE_CASES',
       'RESOLVED_CASES', 'DEATHS']
    data = data[cols]
    
    # fix FILE_DATE
    data['FILE_DATE'] = [str(datetime.datetime.strptime(str(s), '%Y%m%d').date()) for s in data['FILE_DATE']]
    data = data.rename({'FILE_DATE': "DATE"}, axis='columns')
    data = data.sort_values(['PHU_NAME', 'DATE'])
    return data

def daily_rec_deaths(data):
    '''
    calculate daily deaths and recoveries from data
    returns:
        dataframe with daily resolved and daily deaths
    '''
    
    df = data.copy()
    
    regions = data.PHU_NAME.unique()
    daily_cnts = pd.DataFrame({})
    for r in regions:
        tmp = df[df.PHU_NAME == r]
        tmp['DAILY_DEATHS'] = tmp['DEATHS'] - tmp['DEATHS'].shift(1)
        tmp['DAILY_RECOVERED'] = tmp['RESOLVED_CASES'] - tmp['RESOLVED_CASES'].shift(1)
        daily_cnts = pd.concat([daily_cnts, tmp], axis=0)
    
    return daily_cnts

odata = clean_region(odata)
odata = daily_rec_deaths(odata)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [15]:
odata.columns

Index(['DATE', 'PHU_NAME', 'ACTIVE_CASES', 'RESOLVED_CASES', 'DEATHS',
       'DAILY_DEATHS', 'DAILY_RECOVERED'],
      dtype='object')

In [16]:
data['age_category'].unique()

array(['0to13', '14to17', '18to24', '25to64', '65+'], dtype=object)

In [17]:
odata['PHU_NAME'].unique()

array(['ALGOMA DISTRICT', 'BRANT COUNTY', 'CHATHAM-KENT',
       'CITY OF HAMILTON', 'CITY OF OTTAWA', 'DURHAM REGION',
       'EASTERN ONTARIO', 'GREY BRUCE', 'HALDIMAND-NORFOLK',
       'HALIBURTON, KAWARTHA, PINE RIDGE', 'HALTON REGION',
       'HASTINGS & PRINCE EDWARD COUNTIES', 'HURON PERTH',
       'KINGSTON, FRONTENAC, LENNOX & ADDINGTON', 'LAMBTON COUNTY',
       'LEEDS, GRENVILLE AND LANARK DISTRICT', 'MIDDLESEX-LONDON',
       'NIAGARA REGION', 'NORTH BAY PARRY SOUND DISTRICT', 'NORTHWESTERN',
       'OXFORD ELGIN-ST.THOMAS', 'PEEL REGION',
       'PETERBOROUGH COUNTY-CITY', 'PORCUPINE',
       'RENFREW COUNTY AND DISTRICT', 'SIMCOE MUSKOKA DISTRICT',
       'SUDBURY AND DISTRICT', 'THUNDER BAY DISTRICT', 'TIMISKAMING',
       'TORONTO', 'WATERLOO REGION', 'WELLINGTON-DUFFERIN-GUELPH',
       'WINDSOR-ESSEX COUNTY', 'YORK REGION'], dtype=object)

In [21]:
# add daily deaths
# add daily recovered


odata.head()

Unnamed: 0,DATE,PHU_NAME,ACTIVE_CASES,RESOLVED_CASES,DEATHS,DAILY_DEATHS,DAILY_RECOVERED
466,2020-04-01,ALGOMA DISTRICT,4,1,0,,
500,2020-04-02,ALGOMA DISTRICT,4,1,0,0.0,0.0
534,2020-04-03,ALGOMA DISTRICT,5,1,0,0.0,0.0
568,2020-04-04,ALGOMA DISTRICT,6,1,0,0.0,0.0
602,2020-04-05,ALGOMA DISTRICT,5,3,0,0.0,2.0


In [30]:
orpop = pd.read_csv('../data/ontario_region_population.csv')
orpop['Population'] = [int(s.replace(',','')) for s in orpop['Population']]
a = odata.merge(right=orpop, how='left', left_on='PHU_NAME', right_on='new_region')[['PHU_NAME','new_region','Population']].drop_duplicates()
a['Population'] = a['Population'].fillna(a['Population'].mean())

In [33]:
a

Unnamed: 0,PHU_NAME,new_region,Population
0,ALGOMA DISTRICT,ALGOMA DISTRICT,114094.0
298,BRANT COUNTY,BRANT COUNTY,134808.0
596,CHATHAM-KENT,CHATHAM-KENT,102042.0
894,CITY OF HAMILTON,CITY OF HAMILTON,536917.0
1192,CITY OF OTTAWA,CITY OF OTTAWA,934243.0
1490,DURHAM REGION,DURHAM REGION,645862.0
1788,EASTERN ONTARIO,,5926925000.0
2086,GREY BRUCE,GREY BRUCE,6814794000.0
2384,HALDIMAND-NORFOLK,HALDIMAND-NORFOLK,109787.0
2682,"HALIBURTON, KAWARTHA, PINE RIDGE","HALIBURTON, KAWARTHA, PINE RIDGE",7542318000.0


In [66]:
external_stylesheets = [
    'https://codepen.io/chriddyp/pen/bWLwgP.css',
    {
        'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css',
        'rel': 'stylesheet',
        'integrity': 'sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO',
        'crossorigin': 'anonymous'
    }
]

all_regions = odata['PHU_NAME'].unique()


app = JupyterDash(__name__, external_stylesheets = external_stylesheets)
#app = dash.Dash(__name__, external_stylesheets = external_stylesheets)
my_options = [{"label":col, "value":col} for col in data['age_category'].unique()]
my_options.insert(0,{'label':'All', "value": 'All'})

region_options = [{"label": name, "value": name} for name in odata['PHU_NAME'].unique()]
region_options.insert(0,{"label":"All", "value":"All"})

app.layout = html.Div([

    html.H1("Ontario COVID-19 Dashboard", style={'text-align': 'center', 'font-size':'50px'}),
    
    # Row 0 - cool data and news
    html.Div([
        
        html.Div([
            html.H3('Description'),
            html.P('This is a test paragraph')
        ], style={'padding-left':"40px"}, className="six columns bg-white text-dark"),
        
        
        html.Div([
            #html.Br(),
            html.H3('News Updates', className="bg-secondary text-white"),
            dash_table.DataTable(
                id='news_table',
                columns=[{"name": i, "id": i} for i in news_df.columns],
                data=news_df.to_dict('records'),
                style_table={},
                fixed_rows={'headers': True},
                style_cell={
                    'maxWidth':'500px', 'textAlign': 'left', 'font-family':'sans-serif'
                },
                style_data={
                    'whiteSpace':'normal', 'height':'auto', 'font-size':'14px'
                },
                style_header={
                    'fontWeight': 'bold',
                    'font-size':'16px'
                },
            ),
            html.Div([
                dcc.Link(
                    id='news_link', 
                    children=['Source: Global News Canada'], 
                    href='https://globalnews.ca/news/6859636/ontario-coronavirus-timeline/',
                    target='_blank',
                    style={'color':'white'}
                )
            ], style={'font-size':'14px'})

        ], style={}, className="six columns text-dark"),
        

        

    ], className="row"),
    html.Br(),
   
    # ROW 1 - Headers, dropdowns, etc
    html.Div([
        html.Div([
            html.H3('Select Age Group(s)', style={'padding-left':"40px"}),
            dcc.Dropdown(id="age_group",
                options=my_options,
                multi=True,
                value=[my_options[0]['value']],
                style={"width":"100%", "display":"inline-block", "padding-left":"40px"},
                className='text-dark'
            )
        ], className="three columns"),

        html.Div([
            html.H3('Select Date Range'),
            dcc.DatePickerRange(
                id='date-picker-range',
                start_date=date(2020, 1, 1),
                end_date=date(2020, 12, 1),
                end_date_placeholder_text='Select a date!',
                style = {'width': "100%", "display":"inline-block"}
            )
        ], className="three columns"),
        html.Div([
            html.H3('Select Region(s)', style={'padding-left':"0px"}),
            dcc.Dropdown(id="region_select",
                options=region_options,
                multi=True,
                value=[region_options[0]['value']],
                style={"width":"100%", "display":"inline-block", "padding-left":"40px"},
                className='text-dark'
            )
        ], className="three columns"),
        html.Div([
            html.H3('Options', style={'padding-left':"0px"}),
            dcc.Checklist(
                id="per100k",
                options=[
                    {'label': 'Count per 100,000 people (not including daily resolved or fatal cases)', 'value': '100k'},
                ],
                value=[],
                style={'font-size':'12px'}
            )  
        ], className="three columns"),
    ], className="row"),
    
    
    
    html.Div(id='select_ref', children=[], style={"padding-left":"40px"}),
    # ROW 2
    #html.Br(),
        html.Div([
        html.Div([
            #html.H3('Plot 1'),
            html.Br(),
            dcc.Graph(id='placeholder', figure={}
                        )
        ], style={"padding-left":"40px"}, className="six columns"),
        
        html.Div([
            dcc.Tabs([
                dcc.Tab(label='Active Cases', children=[
                    dcc.Graph(
                        id='bar1',
                        figure={}
                    )
                ]),
                dcc.Tab(label='Resolved Cases', children=[
                    dcc.Graph(
                        id='bar2',
                        figure={}
                    )
                ]),
                dcc.Tab(label='Fatal Cases', children=[
                    dcc.Graph(
                        id='bar3',
                        figure={}
                    )
                ]),
            ])
        ], style={}, className='six columns text-dark')

#         html.Div([
#             html.Br(),
#             #html.H3('Plot 2'),
#             dcc.Graph(id='graph2', figure= px.line(
#                             data_frame = odata,
#                             x = 'DATE',
#                             y = 'ACTIVE_CASES',
#                             color = 'PHU_NAME',
#                             title="Active Cases vs Date",
#                             labels = {
#                                 'DATE': 'Date',
#                                 'ACTIVE_CASES': "Number of Active Cases"
#                             }
#                         )
#                      )
#         ], className="six columns"),
    ], className="row"),
    
    # ROW 2.5 - testing and active
    html.Div([
        html.Div([
            #html.H3('Plot 1'),
            html.Br(),
            dcc.Graph(id='graph1', figure=px.line(
                            data_frame = odata,
                            x = 'DATE',
                            y = 'RESOLVED_CASES',
                            color = 'PHU_NAME',
                            title="Active Cases vs Date",
                            labels = {
                                'DATE': 'Date',
                                'RESOLVED_CASES': "Number of Resolved Cases"
                            }
                        ))
        ], style={"padding-left":"40px"}, className="six columns"),
        
        html.Div([
            dcc.Tabs([
                dcc.Tab(label='Active Cases', children=[
                    dcc.Graph(
                        id='graph2',
                        figure={}
                    )
                ]),
                dcc.Tab(label='Resolved Cases', children=[
                    dcc.Graph(
                        id='graph3',
                        figure={}
                    )
                ]),
                dcc.Tab(label='Fatal Cases', children=[
                    dcc.Graph(
                        id='graph4',
                        figure={}
                    )
                ]),
            ])
        ], style={}, className='six columns text-dark')

#         html.Div([
#             html.Br(),
#             #html.H3('Plot 2'),
#             dcc.Graph(id='graph2', figure= px.line(
#                             data_frame = odata,
#                             x = 'DATE',
#                             y = 'ACTIVE_CASES',
#                             color = 'PHU_NAME',
#                             title="Active Cases vs Date",
#                             labels = {
#                                 'DATE': 'Date',
#                                 'ACTIVE_CASES': "Number of Active Cases"
#                             }
#                         )
#                      )
#         ], className="six columns"),
    ], className="row"),
    
    
    # Row 3 - Recovered and Deaths
#     html.Div([
#         html.Div([
#             html.Br(),
#             #html.H3('Resolved Cases'),
#             dcc.Graph(id='graph3', figure=px.line(
#                             data_frame = odata,
#                             x = 'DATE',
#                             y = 'RESOLVED_CASES',
#                             color = 'PHU_NAME',
#                             title="Active Cases vs Date",
#                             labels = {
#                                 'DATE': 'Date',
#                                 'RESOLVED_CASES': "Number of Resolved Cases"
#                             }
#                         ))
#         ], style={"padding-left":"40px"}, className="six columns"),

#         html.Div([
#             html.Br(),
#             #html.H3('New Fatal Cases by Day'),
#             dcc.Graph(id='graph4', figure= {})
#         ], className="six columns"),
#     ], className="row"),
    
    

    
    # Row 4 - Deaths by day
    html.Div([
        
        html.Div([
            html.Br(),
            #html.H3('Daily Recovered'),
            dcc.Graph(id='graph6', figure={})
        ], style={"padding-left":"40px"}, className="six columns"),

    
        html.Div([
            html.Br(),
            #html.H3('Fatalities'),
            dcc.Graph(id='graph5', figure={})
        ], className="six columns"),
    ], className="row"),
    html.Br(),


    
    
], className='bg-secondary text-white')

# @app.callback(
#     [Output(component_id='select_ref', component_property='children'),
#      Output(component_id='graph1', component_property='figure')],
#     [Input(component_id='age_group', component_property='value')]
# )
# # 1. callback selects value, 
# # 2. value is sent to function (defined after callback)
# # 3. function returned values go into the output of callback



@app.callback(
    [Output(component_id='select_ref', component_property='children'),
     Output(component_id='graph1', component_property='figure')],
    [Input(component_id='age_group', component_property='value'),
     Input(component_id='date-picker-range', component_property='start_date'),
     Input(component_id='date-picker-range', component_property='end_date')]
)

def update_graph_date(option_slctd, start, end):
    select_ref = "The age group chosen by user: {}".format(option_slctd)
    
    if 'All' in option_slctd:
        option_slctd = ['0to13', '14to17', '18to24', '25to64', '65+']

    
    sdt = pd.to_datetime(start)
    edt = pd.to_datetime(end)
    
    df = data[(data['age_category'].isin(option_slctd)) & (data['DATE'] >= sdt) & (data['DATE'] <= edt)] #.groupby('DATE').sum().reset_index()
    df = df.sort_values('DATE')
    print(df.head())

    if df.shape[0] == 0:
        fig = px.line()
    else:
        fig = px.line(
            data_frame = df,
            x = 'DATE',
            y = 'percent_positive_7d_avg',
            color = 'age_category',
            title="Percent Positive",
            labels = {
                'DATE': 'Date',
                'percent_positive_7d_avg': "Percent Positive (7 Day Average)"
            }
        )


    fig.update_xaxes(tickangle=90, nticks=20)
    #fig.update_layout(width=int(700))
    
    return select_ref, fig


# add callback for another graph
@app.callback([Output(component_id='graph2', component_property='figure'),
              Output(component_id='graph3', component_property='figure'),
              Output(component_id='graph4', component_property='figure'),
              Output(component_id='graph5', component_property='figure'),
              Output(component_id='graph6', component_property='figure')],
             [Input('region_select', 'value'),
             Input('date-picker-range', 'start_date'),
             Input('date-picker-range', 'end_date'),
             Input('per100k', 'value')])

def graph2(regions, start, end, is_100k):
    '''
    function for working with the graph on the right at the top
    '''
    
    if 'All' in regions:
        regions = all_regions
    elif len(regions) == 0:
        return {}, {}, {}, {}, {}
    
    start = str(pd.to_datetime(start).date())
    end = str(pd.to_datetime(end).date())
    
    df = odata
    df = odata[odata['PHU_NAME'].isin(regions) & (odata['DATE'] >= start) & (odata['DATE'] <= end)]
    
    
    fig1_title = "Active Cases"
    fig1_yaxis = "Number of Active Cases"
    
    fig2_title = "Resolved Cases"
    fig2_yaxis = "Cumulative Number of Resolved Cases"
        
    fig3_title = "Fatal Cases"
    fig3_yaxis = "Cumulative Number of Fatal Cases"
    
    if len(is_100k) > 0:
        # list is not empty, so divide all counts by 100k
        df['ACTIVE_CASES'] = df["ACTIVE_CASES"]/100000
        df['RESOLVED_CASES'] = df["RESOLVED_CASES"]/100000
        df['DEATHS'] = df["DEATHS"]/100000
        fig1_title += ' Per 100,000'
        fig1_yaxis += ' Per 100,000'
        fig2_title += ' Per 100,000'
        fig2_yaxis += ' Per 100,000'
        fig3_title += ' Per 100,000'
        fig3_yaxis += ' Per 100,000'
        
        
    fig1 = px.line(
        data_frame = df,
        x = 'DATE',
        y = 'ACTIVE_CASES',
        color = 'PHU_NAME',
        title=fig1_title,
        labels = {
            'DATE': 'Date',
            'ACTIVE_CASES': fig1_yaxis,
            "PHU_NAME": "Region"
        }
    )
    fig2 = px.line(
        data_frame = df,
        x = 'DATE',
        y = 'RESOLVED_CASES',
        color = 'PHU_NAME',
        title=fig2_title,
        labels = {
            'DATE': 'Date',
            'RESOLVED_CASES': fig2_yaxis,
            "PHU_NAME": "Region"
        }
    )
    fig3 = px.line(
        data_frame = df,
        x = 'DATE',
        y = 'DEATHS',
        color = 'PHU_NAME',
        title=fig3_title,
        labels = {
            'DATE': 'Date',
            'DEATHS': fig3_yaxis,
            "PHU_NAME": "Region"
        }
    )
    
    fig4 = px.line(
        data_frame = df,
        x = 'DATE',
        y = 'DAILY_DEATHS',
        color = 'PHU_NAME',
        title="Daily Fatal Cases",
        labels = {
            'DATE': 'Date',
            'DAILY_DEATHS': "Daily Number of Fatal Cases",
            "PHU_NAME": "Region"
        }
    )
    
    fig5 = px.line(
        data_frame = df,
        x = 'DATE',
        y = 'DAILY_RECOVERED',
        color = 'PHU_NAME',
        title="Daily Resolved Cases",
        labels = {
            'DATE': 'Date',
            'DAILY_RECOVERED': "Daily Number of Recovered Cases",
            "PHU_NAME": "Region"
        }
    )
    
    
    
    return fig1, fig2, fig3, fig4, fig5


@app.callback([Output(component_id='bar1', component_property='figure'),
              Output(component_id='bar2', component_property='figure'),
              Output(component_id='bar3', component_property='figure')],
             [Input('region_select', 'value')])

def build_bars(regions):
    '''
    build 3 bar charts: active, resolved, deaths for region cases
    '''
    
    # fig = px.bar(wide_df, x="nation", y=["gold", "silver", "bronze"], title="Wide-Form Input")
    
    if 'All' in regions:
        regions = all_regions
    elif len(regions) == 0:
        return {}, {}, {}
    
    maxdate = odata['DATE'].max()
    
    df = odata[odata['PHU_NAME'].isin(regions) & (odata['DATE'] == str(maxdate))]
    
    fig1 = px.bar(df, x="PHU_NAME", y="ACTIVE_CASES", color = "PHU_NAME", title="Active Cases as of "+maxdate)
    
    fig2 = px.bar(df, x="PHU_NAME", y="RESOLVED_CASES", color = "PHU_NAME", title="Resolved Cases as of "+maxdate)
    fig3 = px.bar(df, x="PHU_NAME", y="DEATHS", color = "PHU_NAME", title="Fatal Cases as of "+maxdate)
    
    return fig1, fig2, fig3
    

app.run_server(mode='external')



Dash app running on http://127.0.0.1:8050/
           DATE age_category  percent_positive_7d_avg
0    2020-05-01        0to13                   0.0340
756  2020-05-01       25to64                   0.0503
504  2020-05-01       18to24                   0.0437
252  2020-05-01       14to17                   0.0337
1008 2020-05-01          65+                   0.0427
           DATE age_category  percent_positive_7d_avg
0    2020-05-01        0to13                   0.0340
756  2020-05-01       25to64                   0.0503
504  2020-05-01       18to24                   0.0437
252  2020-05-01       14to17                   0.0337
1008 2020-05-01          65+                   0.0427
          DATE age_category  percent_positive_7d_avg
252 2020-05-01       14to17                   0.0337
253 2020-05-02       14to17                   0.0374
254 2020-05-03       14to17                   0.0431
255 2020-05-04       14to17                   0.0378
256 2020-05-05       14to17                 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [55]:
#odata.groupby('PHU_NAME').max('DATE')
# toronto 9008	67552	2251	74.0	1060.0
odata[odata['DATE'].isin(['2021-01-22'])].count()
#odata[odata['PHU_NAME']== 'TORONTO']['DATE'].max()

DATE               34
PHU_NAME           34
ACTIVE_CASES       34
RESOLVED_CASES     34
DEATHS             34
DAILY_DEATHS       34
DAILY_RECOVERED    34
dtype: int64

In [62]:
maxdate = odata['DATE'].max()
odata[odata['DATE'] == str(maxdate)]

Unnamed: 0,DATE,PHU_NAME,ACTIVE_CASES,RESOLVED_CASES,DEATHS,DAILY_DEATHS,DAILY_RECOVERED
10098,2021-01-22,ALGOMA DISTRICT,25,123,1,0.0,3.0
10099,2021-01-22,BRANT COUNTY,119,1270,9,1.0,28.0
10100,2021-01-22,CHATHAM-KENT,78,970,6,0.0,7.0
10101,2021-01-22,CITY OF HAMILTON,708,7582,231,4.0,104.0
10102,2021-01-22,CITY OF OTTAWA,1225,11089,414,7.0,140.0
10103,2021-01-22,DURHAM REGION,630,9160,267,1.0,120.0
10104,2021-01-22,EASTERN ONTARIO,326,1881,56,0.0,70.0
10105,2021-01-22,GREY BRUCE,35,572,0,0.0,3.0
10106,2021-01-22,HALDIMAND-NORFOLK,97,1082,41,0.0,19.0
10107,2021-01-22,"HALIBURTON, KAWARTHA, PINE RIDGE",103,648,30,0.0,4.0


In [17]:
pd.read_csv('../../Region_Mobility_Report_CSVs/2020_CA_Region_Mobility_Report.csv')['sub_region_2'].unique()

array([nan, 'Division No. 1 - Medicine Hat',
       'Division No. 10 - Lloydminster', 'Division No. 11 - Edmonton',
       'Division No. 12 - Cold Lake', 'Division No. 13 - Whitecourt',
       'Division No. 14 - Hinton', 'Division No. 15 - Canmore',
       'Division No. 16 - Fort McMurray', 'Division No. 17 - Slave Lake',
       'Division No. 18 - Grande Cache',
       'Division No. 19 - Grande Prairie', 'Division No. 2 - Lethbridge',
       'Division No. 3 - Claresholm', 'Division No. 5 - Strathmore',
       'Division No. 6 - Calgary', 'Division No. 7 - Wainwright',
       'Division No. 8 - Red Deer',
       'Division No. 9 - Rocky Mountain House', 'Alberni-Clayoquot',
       'Bulkley-Nechako', 'Capital', 'Cariboo', 'Central Kootenay',
       'Central Okanagan', 'Columbia-Shuswap', 'Comox Valley',
       'Cowichan Valley', 'East Kootenay', 'Fraser Valley',
       'Fraser-Fort George', 'Kitimat-Stikine', 'Kootenay Boundary',
       'Metro Vancouver', 'Mount Waddington', 'Nanaimo', 'Nor

NameError: name 'odata' is not defined

## Web Scraper

In [18]:
# import requests
# from bs4 import BeautifulSoup
# import re

# URL = 'https://globalnews.ca/news/6859636/ontario-coronavirus-timeline/'
# page = requests.get(URL)

# soup = BeautifulSoup(page.content, 'html.parser')

In [19]:
# import re
# soup.find_all('span', text=re.compile("^Updated"))[0].text

In [20]:
# def get_updated_date(url='https://globalnews.ca/news/6859636/ontario-coronavirus-timeline/'):
#     '''
#     return updated date from website
#     '''
#     page = requests.get(url)

#     soup = BeautifulSoup(page.content, 'html.parser')
#     return soup.find_all('span', text=re.compile("^Updated"))[0].text

In [21]:
# soup.find_all('strong',text=re.compile("^[A-Za-z]{3,4}\."))[:10]

In [22]:
# ps = soup.find_all('p')
# print(len(ps))
# # for i in range(5,10):
# #     if ps[i].find('strong'):
# #         print(ps[i].text)
        
# events = [ps[i].text.replace(u'\xa0', u' ') for i in range(6,len(ps)) if ps[i].find('strong')]


In [23]:
# print(events[-5:])

In [24]:
# def get_news_table(url = 'https://globalnews.ca/news/6859636/ontario-coronavirus-timeline/'):
#     '''
#     Get the news data from URL
#     '''
#     page = requests.get(URL)
#     soup = BeautifulSoup(page.content, 'html.parser')
#     ps = soup.find_all('p')
#     events = [ps[i].text.replace(u'\xa0', u' ') for i in range(6,len(ps)) if ps[i].find('strong')]
#     dates = [e.split(':',1)[0] for e in events]
#     descriptions = [e.split(':',1)[1].strip() for e in events]
#     news = pd.DataFrame({'Date':dates, 'Description':descriptions})
    
#     return news

In [25]:
# # s.split('mango', 1)[1]
# dates = [e.split(':',1)[0] for e in events]
# descriptions = [e.split(':',1)[1].strip() for e in events]

In [26]:
# news = pd.DataFrame({'Date':dates, 'Description':descriptions})

           DATE age_category  percent_positive_7d_avg
0    2020-05-01        0to13                   0.0340
756  2020-05-01       25to64                   0.0503
504  2020-05-01       18to24                   0.0437
252  2020-05-01       14to17                   0.0337
1008 2020-05-01          65+                   0.0427
