In [3]:
# Import the required libraries
import pandas as pd 
import numpy as np
import datetime
from datetime import date

# New Data

In [4]:
# Specify the dataset URL
info_url ='https://github.com/owid/covid-19-data/raw/master/public/data/owid-covid-data.csv'

# Read the dataset into a DataFrame
df = pd.read_csv(info_url)

# Show the top of the DataFrame
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-01-03,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
1,AFG,Asia,Afghanistan,2020-01-04,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
2,AFG,Asia,Afghanistan,2020-01-05,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
3,AFG,Asia,Afghanistan,2020-01-06,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
4,AFG,Asia,Afghanistan,2020-01-07,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,


In [5]:
# Specify the continents
continents_location = df[df['continent'].isna()]['location'].unique()

# Loop over each continent and drop it from the DataFrame
for x in continents_location:
    df = df.drop(df[df.location==x].index)

In [6]:
# Determining how many rows are NaN
df.isna().sum()

iso_code                                        0
continent                                       0
location                                        0
date                                            0
total_cases                                 37270
                                            ...  
population                                      0
excess_mortality_cumulative_absolute       303557
excess_mortality_cumulative                303557
excess_mortality                           303557
excess_mortality_cumulative_per_million    303557
Length: 67, dtype: int64

In [7]:
# Fill each NaN row with a 0
df = df.fillna(0)

In [8]:
# Checking the data type of each column
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 315092 entries, 0 to 330831
Data columns (total 67 columns):
 #   Column                                      Non-Null Count   Dtype  
---  ------                                      --------------   -----  
 0   iso_code                                    315092 non-null  object 
 1   continent                                   315092 non-null  object 
 2   location                                    315092 non-null  object 
 3   date                                        315092 non-null  object 
 4   total_cases                                 315092 non-null  float64
 5   new_cases                                   315092 non-null  float64
 6   new_cases_smoothed                          315092 non-null  float64
 7   total_deaths                                315092 non-null  float64
 8   new_deaths                                  315092 non-null  float64
 9   new_deaths_smoothed                         315092 non-null  float64
 

In [9]:
# Selected a sample of each column that is of type 'Object'
df.select_dtypes(object).sample(5)

Unnamed: 0,iso_code,continent,location,date,tests_units
267412,SXM,North America,Sint Maarten (Dutch part),2021-07-09,0
127449,HND,North America,Honduras,2021-08-26,0
248077,KNA,North America,Saint Kitts and Nevis,2022-02-13,people tested
311645,USA,North America,United States,2020-12-06,tests performed
8394,AGO,Africa,Angola,2021-07-02,0


In [10]:
# Specifying the columns that are not required
drop_cols=['iso_code','tests_units']

# Droping Said columns
df = df.drop(columns=drop_cols)

In [11]:
# Converting floats to integers
for x in df.select_dtypes(float).columns:
    df[x] = df[x].astype(int)

In [12]:
# Converting the column 'date' to datetime
df['date'] = pd.to_datetime(df['date'])

In [13]:
# Confirming the changes were successful
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 315092 entries, 0 to 330831
Data columns (total 65 columns):
 #   Column                                      Non-Null Count   Dtype         
---  ------                                      --------------   -----         
 0   continent                                   315092 non-null  object        
 1   location                                    315092 non-null  object        
 2   date                                        315092 non-null  datetime64[ns]
 3   total_cases                                 315092 non-null  int64         
 4   new_cases                                   315092 non-null  int64         
 5   new_cases_smoothed                          315092 non-null  int64         
 6   total_deaths                                315092 non-null  int64         
 7   new_deaths                                  315092 non-null  int64         
 8   new_deaths_smoothed                         315092 non-null  int64        

In [14]:
# Defining the function to get the selected column drom the dataframe
def get_col(df,column):
    ## Get the subset df to work off
    cases = df[['location','date',column]]

    ## Create the dates variable and sort it
    dates = cases['date'].unique()
    dates.sort()


    ## Get the len of dates
    len_dates = len(dates)

    ## Get the unique locations
    locations = [x for x in cases['location'].unique()]

    data = list()
    ## Loop through each location to get the column and merge it to the dataframe 
    for x in locations:
        y = ()
        len_y = 0 
        y = list(cases[cases['location']==x][column])
        len_y = len(y)
        len_zeroes = len_dates-len_y
        zeroes = list(np.zeros(len_zeroes))
        merge = zeroes + y
        data.append(merge)
    data = np.array(data).T
    df2 = pd.DataFrame(data,columns=locations,index=dates)
    df2 = df2.astype(int)
    
    return df2

In [15]:
# Getting the DataFrames necessary to graph
deaths = get_col(df,'total_deaths')
cases = get_col(df,'total_cases')
new_cases = get_col(df,'new_cases')
new_deaths = get_col(df,'new_deaths')

In [16]:
# Creating a function to get the continent's information
def get_world(col):
    date = df['date'].max()

    continent_cases = df[df['date']==date.strftime('%Y-%m-%d')]

    total_cases_world = pd.DataFrame(continent_cases.groupby('continent')[col].sum())
    
    return total_cases_world

In [17]:
# Getting the total cases for the World using the function
total_cases_world = get_world('total_cases')

# Vaccination Data

In [18]:
# Specifying the Vaccination dataset URL
vac_url = 'https://github.com/owid/covid-19-data/raw/master/public/data/vaccinations/vaccinations.csv'

# Reading the dataset into a DataFrame
vac_df = pd.read_csv(vac_url)

vac_df.head()

Unnamed: 0,location,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,daily_vaccinations_per_million,daily_people_vaccinated,daily_people_vaccinated_per_hundred
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,,0.0,0.0,,,,,
1,Afghanistan,AFG,2021-02-23,,,,,,1367.0,,,,,33.0,1367.0,0.003
2,Afghanistan,AFG,2021-02-24,,,,,,1367.0,,,,,33.0,1367.0,0.003
3,Afghanistan,AFG,2021-02-25,,,,,,1367.0,,,,,33.0,1367.0,0.003
4,Afghanistan,AFG,2021-02-26,,,,,,1367.0,,,,,33.0,1367.0,0.003


In [19]:
# Creating a new DataFrame
vac_df_max = pd.DataFrame(columns=vac_df.columns)

# Looping over every location and getting the most recent data then merging it to the new DataFrame
for location in vac_df['location'].unique():
    result = vac_df.loc[(vac_df['location']==location)&(vac_df['date']==vac_df.loc[(vac_df['location']==location)]['date'].max())]
    vac_df_max = pd.concat((vac_df_max,result))

# Droping the continent information from the DataFrame
for x in continents_location:
    vac_df_max = vac_df_max.drop(vac_df_max[vac_df_max.location==x].index)
    
# Filling NaNs with 0
vac_df_max = vac_df_max.fillna(0)

# Converting columns
vac_df_max['date'] = pd.to_datetime(vac_df_max['date'])

# Getting the most recent date
date = vac_df_max['date'].max()

# More data preprocessing for the vaccination dataset
vac_cols = [x for x in vac_df_max.columns[7::]]
vac_cols.append('iso_code')
vac_cols.append('date')
vac_cols.append('total_vaccinations')
vac_df_max = vac_df_max.drop(columns=vac_cols)
vac_df_max = vac_df_max.set_index('location').T

# Changing column data types and index labels
vac_df_max = vac_df_max.astype(int)
vac_df_max = vac_df_max.rename(index={'people_vaccinated': "People Vaccinated", 
                     'people_fully_vaccinated': "People Fully Vaccinated", 
                     'total_boosters': "Total Boosters"})

In [20]:
vac_df_max

location,Afghanistan,Albania,Algeria,Andorra,Angola,Anguilla,Antigua and Barbuda,Argentina,Armenia,Aruba,...,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,Wales,Wallis and Futuna,Yemen,Zambia,Zimbabwe
People Vaccinated,17579369,1348396,7840131,57912,16177587,10854,64290,41502788,1122646,90468,...,3010161,22077238,176624,22157232,90272853,0,7150,1037766,11711565,6437808
People Fully Vaccinated,16833190,1277987,6481186,53500,9209276,10380,62384,34891250,996804,84332,...,2902771,18318860,162250,0,85961564,0,6803,794265,0,4751270
Total Boosters,2151349,399190,575651,43068,2437087,0,9838,36763538,45446,0,...,3118412,16000801,27697,651502,57975220,0,3766,65523,0,1033676


# Dash

In [21]:
# Import the required libraries for Dash and Plotly
import dash
from dash import dash_table
from dash import html as html
from dash import dcc as dcc
import plotly.graph_objects as go
import plotly.express as px
from dash.dependencies import Input, Output
from dash import State, html
import dash_bootstrap_components as dbc
from jupyter_dash import JupyterDash

In [22]:
# Specifying the button group
button_groups = dbc.ButtonGroup(
    [dbc.Button("Learn More",href='https://www.who.int/emergencies/diseases/novel-coronavirus-2019'), dbc.Button("Github",href='https://github.com/ronanmccormack-ca/DAV2021-Mini-Project')]
)

In [23]:
# Creating the Dash App
app = dash.Dash(__name__,external_stylesheets=[dbc.themes.ZEPHYR])
server = app.server

# Getting some information for use in the dashboard
loc_columns = [x for x in df['location'].unique()]
night_colors = ['rgb(56, 75, 126)', 'rgb(18, 36, 37)', 'rgb(34, 53, 101)',
                'rgb(36, 55, 57)', 'rgb(6, 4, 4)']
top_cases_country = [x for x in df[df['date']==df['date'].max()].sort_values(by='total_cases',ascending=False)['location'].head(5)]
cases_min = cases.index.min().strftime('%Y-%m-%d')
cases_max = cases.index.max().strftime('%Y-%m-%d')

# App layout
app.layout = html.Div(id = 'parent', children = [
    html.H1(id = 'H1', children = 'COVID-19 Dashboard', style = {'textAlign':'center',\
                                            'marginTop':30,'marginBottom':30}),
    html.Div(children='''
        Created by Ronan McCormack S00144576
    ''',style={
            'textAlign': 'center',
            'color': 'primary',
            'marginBottom':10
        }),
    # The dropdown to selected which country you wish to view
    html.Div(id = 'cases_graph', children= [
        dbc.Row([ 
            button_groups
        ]),
        dcc.Dropdown(
                id='dropdown',
                options=[{'label': i, 'value': i} for i in loc_columns],
                value='Ireland',
                style={
                    'width':'90%',
                    'padding-left':'20%',
                    'marginTop':20
                }
            ),
        dcc.Dropdown(
                id='dropdown_date',
                options=[{'label': i, 'value': i} for i in cases.index.strftime('%Y-%m-%d')],
                value=cases_max,
                style={
                    'width':'90%',
                    'padding-left':'20%',
                    'marginTop':20
                }
            ),
        ]),
    # The cards which display the raw data
    html.Div(id='cards',children=[
        dbc.Row([
        dbc.Card([
            dbc.CardBody([
                html.P("Total Cases", className='card-title'),
                html.H5(id='cases_card',className='card-text'),
            ]),
        ],color="primary", outline=True,
        style={"marginTop":20,
              'width':'25%',
              'textAlign': 'center'}),
        dbc.Card([
            dbc.CardBody([
                html.P("Total Deaths", className='card-title'),
                html.H5(id='deaths_card',className='card-text'),
            ]),
        ],color="primary", outline=True,
        style={"marginTop":20,
              'width':'25%',
              'textAlign': 'center'}),
        dbc.Card([
            dbc.CardBody([
                html.P("New Cases", className='card-title'),
                html.H5(id='new_cases_card',className='card-text'),
            ]),
        ],color="primary", outline=True,
        style={"marginTop":20,
              'width':'25%',
              'textAlign': 'center'}),
        dbc.Card([
            dbc.CardBody([
                html.P("New Deaths", className='card-title'),
                html.H5(id='new_deaths_card',className='card-text'),
            ]),
        ],color="primary", outline=True,
        style={"marginTop":20,
              'width':'25%',
              'textAlign': 'center'}),
        ]), 
    ]),
    # The Covid Gaphs used in the dashboard
    html.Div(id='new_case_graph',children=[
        dcc.Graph(id='cases_plot',style={'display': 'inline-block', 'width': '50%',"marginTop":20,}),
        dcc.Graph(id='new_case_plot',style={'display': 'inline-block', 'width': '50%'})
    ]),
    dbc.Container([    
    dcc.Graph(id='top_country_plot',style={'display': 'inline-block', 'width': '100%'}),
]),
    html.Div(id='vac_graph',children=[
        dcc.Graph(id='new_world_plot',style={'display': 'inline-block', 'width': '50%'}),
        dcc.Graph(id='total_world_plot',style={'display': 'inline-block', 'width': '50%'}),
        ]),
])

## App Callbacks
@app.callback(Output(component_id='cases_plot', component_property= 'figure'),
              (Output(component_id='new_case_plot',component_property='figure')),
              (Output(component_id='total_world_plot',component_property='figure')),
              (Output(component_id='new_world_plot',component_property='figure')),
              (Output(component_id='top_country_plot',component_property='figure')),
              (Output(component_id='cases_card',component_property='children')),
              (Output(component_id='deaths_card',component_property='children')),
              (Output(component_id='new_cases_card',component_property='children')),
              (Output(component_id='new_deaths_card',component_property='children')),
              [Input(component_id='dropdown', component_property= 'value'),
              Input(component_id='dropdown_date', component_property= 'value')])

# Function used to return data based on the selected inputs              
def cases_update(dropdown_value,dropdown_date_value):
    cases_min = cases.index.min().strftime('%Y-%m-%d')
    start_date = cases_min
    end_date = dropdown_date_value
    mask = (cases.index >= start_date) & (cases.index <= end_date)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x = cases.loc[mask].index, y = cases['{}'.format(dropdown_value)],\
                      mode='lines',name='Total Cases',line = dict(color = 'black', width = 4)))
    fig.add_trace(go.Scatter(x = deaths.loc[mask].index, y = deaths['{}'.format(dropdown_value)],\
                     mode='lines',name='Total Deaths',line = dict(color = 'blue', width = 4)))
    fig.update_layout(title={'text':'Total Cases & Deaths', 
                      'y':0.9,'x':0.5,'yanchor': 'top'},
                      xaxis_title = 'Date',
                      yaxis_title = 'Confirmed Cases & Deaths'
                      )
    fig2 = go.Figure()
    fig2.add_trace(go.Bar(x = new_cases.loc[mask].index, y = new_cases['{}'.format(dropdown_value)],\
                     name='New Cases',marker = dict(color = 'black')))
    fig2.add_trace(go.Bar(x = new_deaths.loc[mask].index, y = new_deaths['{}'.format(dropdown_value)],\
                     name='New Deaths',marker = dict(color = 'blue')))
    fig2.update_layout(title={'text':'New Cases & Deaths',           
                       'y':0.9,'x':0.5,'yanchor': 'top'},
                      xaxis_title = 'Date',
                      yaxis_title = 'Confirmed New Cases',
                      )
    fig4 = go.Figure([go.Bar(x=total_cases_world.index, y=total_cases_world['total_cases'],
                            marker_color=night_colors)
                     ])

    fig4.update_layout(title={'text':'Total COVID Cases Per Continent',           
                   'y':0.9,'x':0.5,'yanchor': 'top'}
                      )
    fig5 = go.Figure([go.Pie(labels=vac_df_max.index, values=vac_df_max['{}'.format(dropdown_value)],
                            hole=.3,marker_colors=night_colors)
                     ])

    fig5.update_layout(title={'text':'Percentage of All People Vaccinationed in '+'{}'.format(dropdown_value),           
                   'y':0.9,'x':0.5,'yanchor': 'top'}
                      )
    top_cases_country.append('{}'.format(dropdown_value))
    fig6 = go.Figure()
    for country in set(top_cases_country):
        fig6.add_trace(go.Scatter(x = cases.loc[mask].index, y = cases[country],\
                                 name=country,))
    
    fig6.update_layout(title={'text':'{}'.format(dropdown_value)+' Compared to Top 5 Countries - Total Cases',           
                   'y':0.9,'x':0.5,'yanchor': 'top'},
                  xaxis_title = 'Date',
                  yaxis_title = 'Confirmed New Cases',
                  )
    cases_format = cases.iloc[mask]['{}'.format(dropdown_value)].iloc[-1]
    deaths_format = deaths.iloc[mask]['{}'.format(dropdown_value)].iloc[-1]
    new_cases_format = new_cases.iloc[mask]['{}'.format(dropdown_value)].iloc[-1]
    new_deaths_format = new_deaths.iloc[mask]['{}'.format(dropdown_value)].iloc[-1]
    return fig, fig2, fig4, fig5, fig6, cases_format,deaths_format,new_cases_format,new_deaths_format
if __name__ == '__main__': 
    app.run_server(host='localhost',port=8000)

Dash is running on http://localhost:8000/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://localhost:8000/ (Press CTRL+C to quit)
127.0.0.1 - - [08/Aug/2023 20:00:45] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 20:00:45] "GET /_dash-component-suites/dash/deps/polyfill@7.v2_0_0m1632235559.12.1.min.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 20:00:45] "GET /_dash-component-suites/dash_bootstrap_components/_components/dash_bootstrap_components.v1_0_0m1634714884.min.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 20:00:45] "GET /_dash-component-suites/dash/deps/react-dom@16.v2_0_0m1632235559.14.0.min.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 20:00:45] "GET /_dash-component-suites/dash/deps/prop-types@15.v2_0_0m1632235559.7.2.min.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 20:00:45] "GET /_dash-component-suites/dash/dash-renderer/build/dash_renderer.v2_0_0m1632235559.min.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 20:00:45] "GET /_dash-component-suites/dash/deps/react@16.v2_0_0m1632235559.14.0.min.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Aug/2023 20