In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
from dash import Dash, dcc, html, Input, Output,no_update

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


### Display data

In [2]:
data = pd.read_csv("worldometer_coronavirus_daily_data.csv")
data

Unnamed: 0,date,country,cumulative_total_cases,daily_new_cases,active_cases,cumulative_total_deaths,daily_new_deaths
0,2020-2-15,Afghanistan,0.0,,0.0,0.0,
1,2020-2-16,Afghanistan,0.0,,0.0,0.0,
2,2020-2-17,Afghanistan,0.0,,0.0,0.0,
3,2020-2-18,Afghanistan,0.0,,0.0,0.0,
4,2020-2-19,Afghanistan,0.0,,0.0,0.0,
...,...,...,...,...,...,...,...
184782,2022-5-10,Zimbabwe,248642.0,106.0,963.0,5481.0,2.0
184783,2022-5-11,Zimbabwe,248778.0,136.0,1039.0,5481.0,0.0
184784,2022-5-12,Zimbabwe,248943.0,165.0,1158.0,5481.0,0.0
184785,2022-5-13,Zimbabwe,249131.0,188.0,1283.0,5482.0,1.0


Data have different types and also temporal aspcet with date column as we can see changes over time


In [3]:
data['date'] = pd.to_datetime(data['date'])
population = pd.read_csv("worldometer_coronavirus_summary_data.csv") #load the population
population = population[['country', 'population']]
population

Unnamed: 0,country,population
0,Afghanistan,40560636
1,Albania,2871945
2,Algeria,45325517
3,Andorra,77495
4,Angola,34769277
...,...,...
221,Wallis And Futuna Islands,10873
222,Western Sahara,624681
223,Yemen,31049015
224,Zambia,19342381


In [4]:
data = pd.merge(data, population, on='country', how='left')#merge the datasets
data

Unnamed: 0,date,country,cumulative_total_cases,daily_new_cases,active_cases,cumulative_total_deaths,daily_new_deaths,population
0,2020-02-15,Afghanistan,0.0,,0.0,0.0,,40560636
1,2020-02-16,Afghanistan,0.0,,0.0,0.0,,40560636
2,2020-02-17,Afghanistan,0.0,,0.0,0.0,,40560636
3,2020-02-18,Afghanistan,0.0,,0.0,0.0,,40560636
4,2020-02-19,Afghanistan,0.0,,0.0,0.0,,40560636
...,...,...,...,...,...,...,...,...
184782,2022-05-10,Zimbabwe,248642.0,106.0,963.0,5481.0,2.0,15265849
184783,2022-05-11,Zimbabwe,248778.0,136.0,1039.0,5481.0,0.0,15265849
184784,2022-05-12,Zimbabwe,248943.0,165.0,1158.0,5481.0,0.0,15265849
184785,2022-05-13,Zimbabwe,249131.0,188.0,1283.0,5482.0,1.0,15265849


In [5]:
missing_population = data[data['population'].isna()]#check if every country has a population assigned
print("Countries without population data:")
print(missing_population['country'].unique())

Countries without population data:
[]


In [6]:
countries_in_dataset = data['country'].unique()
print("Countries in dataset:")#get list of countries
print(countries_in_dataset)

Countries in dataset:
['Afghanistan' 'Albania' 'Algeria' 'Andorra' 'Angola' 'Anguilla'
 'Antigua And Barbuda' 'Argentina' 'Armenia' 'Aruba' 'Australia' 'Austria'
 'Azerbaijan' 'Bahamas' 'Bahrain' 'Bangladesh' 'Barbados' 'Belarus'
 'Belgium' 'Belize' 'Benin' 'Bermuda' 'Bhutan' 'Bolivia'
 'Bosnia And Herzegovina' 'Botswana' 'Brazil' 'British Virgin Islands'
 'Brunei Darussalam' 'Bulgaria' 'Burkina Faso' 'Burundi' 'Cabo Verde'
 'Cambodia' 'Cameroon' 'Canada' 'Caribbean Netherlands' 'Cayman Islands'
 'Central African Republic' 'Chad' 'Channel Islands' 'Chile'
 'China Hong Kong Sar' 'China Macao Sar' 'China' 'Colombia' 'Comoros'
 'Congo' 'Cook Islands' 'Costa Rica' 'Cote D Ivoire' 'Croatia' 'Cuba'
 'Curacao' 'Cyprus' 'Czech Republic' 'Democratic Republic Of The Congo'
 'Denmark' 'Djibouti' 'Dominica' 'Dominican Republic' 'Ecuador' 'Egypt'
 'El Salvador' 'Equatorial Guinea' 'Eritrea' 'Estonia' 'Ethiopia'
 'Faeroe Islands' 'Falkland Islands Malvinas' 'Fiji' 'Finland' 'France'
 'French Guiana'

In [7]:
countries_to_remove = [
    'French Guiana', 'Reunion', 'Guadeloupe', 'Martinique', 'Mayotte', 
    'Saint Barthelemy', 'Saint Martin', 'Saint Pierre And Miquelon', 
    'French Polynesia', 'Anguilla', 'British Virgin Islands', 
    'Cayman Islands', 'Channel Islands', 'Montserrat', 
    'Turks And Caicos Islands', 'Aruba', 'Caribbean Netherlands', 
    'Curacao', 'Sint Maarten', 'US Virgin Islands', 
    'China Hong Kong Sar', 'China Macao Sar', 'Cook Islands', 
    'Faeroe Islands', 'San Marino', 'Liechtenstein', 'Grenada'
]#these are not shown on the dash map

data = data[~data['country'].isin(countries_to_remove)]


### Normalize the data to values per milion according to the population

In [8]:
data['cases_per_million'] = (data['cumulative_total_cases'] / data['population']) * 1_000_000
data['new_cases_per_million'] = (data['daily_new_cases'] / data['population']) * 1_000_000
data['active_cases_per_million'] = (data['active_cases'] / data['population']) * 1_000_000
data['deaths_per_million'] = (data['cumulative_total_deaths'] / data['population']) * 1_000_000
data['new_deaths_per_million'] = (data['daily_new_deaths'] / data['population']) * 1_000_000

data




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['cases_per_million'] = (data['cumulative_total_cases'] / data['population']) * 1_000_000
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['new_cases_per_million'] = (data['daily_new_cases'] / data['population']) * 1_000_000
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['active_cases_p

Unnamed: 0,date,country,cumulative_total_cases,daily_new_cases,active_cases,cumulative_total_deaths,daily_new_deaths,population,cases_per_million,new_cases_per_million,active_cases_per_million,deaths_per_million,new_deaths_per_million
0,2020-02-15,Afghanistan,0.0,,0.0,0.0,,40560636,0.000000,,0.000000,0.000000,
1,2020-02-16,Afghanistan,0.0,,0.0,0.0,,40560636,0.000000,,0.000000,0.000000,
2,2020-02-17,Afghanistan,0.0,,0.0,0.0,,40560636,0.000000,,0.000000,0.000000,
3,2020-02-18,Afghanistan,0.0,,0.0,0.0,,40560636,0.000000,,0.000000,0.000000,
4,2020-02-19,Afghanistan,0.0,,0.0,0.0,,40560636,0.000000,,0.000000,0.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
184782,2022-05-10,Zimbabwe,248642.0,106.0,963.0,5481.0,2.0,15265849,16287.466226,6.943603,63.081981,359.036697,0.131011
184783,2022-05-11,Zimbabwe,248778.0,136.0,1039.0,5481.0,0.0,15265849,16296.375000,8.908774,68.060414,359.036697,0.000000
184784,2022-05-12,Zimbabwe,248943.0,165.0,1158.0,5481.0,0.0,15265849,16307.183439,10.808439,75.855591,359.036697,0.000000
184785,2022-05-13,Zimbabwe,249131.0,188.0,1283.0,5482.0,1.0,15265849,16319.498509,12.315070,84.043803,359.102203,0.065506


In [9]:
app = Dash(__name__)

app.layout = html.Div([
    html.H1("COVID-19 Data Visualization", style={'textAlign': 'center'}),
    html.Div([
        html.Div([
            dcc.Dropdown(
                id='metric-dropdown',
                options=[
                    {'label': 'Cases Per Million', 'value': 'cases_per_million'},
                    {'label': 'New Cases Per Million', 'value': 'new_cases_per_million'},
                    {'label': 'Active Cases Per Million', 'value': 'active_cases_per_million'},
                    {'label': 'Deaths Per Million', 'value': 'deaths_per_million'},
                    {'label': 'New Deaths Per Million', 'value': 'new_deaths_per_million'}
                ],
                value='cases_per_million',
                placeholder="Select a Metric for the Map",
                style={'width': '90%', 'margin': 'auto', 'marginBottom': '20px'}
            ),
                html.Div([
                    html.Label("Select Date", style={'textAlign': 'center', 'marginBottom': '10px'}),
                    html.Div(
                        [
                            html.Span(f"Min: {data['date'].min().strftime('%Y-%m-%d')}", style={'fontSize': '14px', 'fontWeight': 'bold', 'color': '#333'}),
                            html.Span(f"Max: {data['date'].max().strftime('%Y-%m-%d')}", style={'fontSize': '14px', 'fontWeight': 'bold', 'color': '#333'}),
                        ],
                        style={
                            'display': 'flex',
                            'justifyContent': 'space-between',
                            'width': '90%',
                            'margin': 'auto',
                            'marginBottom': '5px'
                        }
                    ),
                ]),
    
            dcc.Slider(
                id='date-slider',
                min=0,
                max=len(data['date'].unique()) - 1,
                step=1,
                value=0,
                marks={i: date.strftime('%Y-%m-%d') for i, date in enumerate(sorted(data['date'].unique()))},
                tooltip={"placement": "bottom", "always_visible": True},
            ),
            dcc.Graph(
                id='choropleth-map',
                style={'height': '800px', 'width': '100%'}
            )
        ], style={'flex': '2', 'padding': '20px', 'boxShadow': '0px 4px 8px rgba(0, 0, 0, 0.2)', 'marginRight': '10px'}),

        html.Div([
            html.Div([
                dcc.Dropdown(
                    id='linechart-1-country-dropdown',
                    options=[{'label': country, 'value': country} for country in sorted(data['country'].unique())],
                    value='China',#default country displayed
                    placeholder="Select a Country for Linechart 1",
                    style={'marginBottom': '10px'}
                ),
                dcc.Graph(
                    id='line-chart-1',
                    style={'height': '400px', 'width': '100%'}
                )
            ], style={'marginBottom': '30px'}),

            html.Div([
                dcc.Dropdown(
                    id='linechart-2-country-dropdown',
                    options=[{'label': country, 'value': country} for country in sorted(data['country'].unique())],
                    value='China',
                    placeholder="Select a Country for Linechart 2",
                    style={'marginBottom': '10px'}
                ),
                dcc.Graph(
                    id='line-chart-2',
                    style={'height': '400px', 'width': '100%'}
                )
            ], style={'marginBottom': '30px'}),

            html.Div([
                dcc.Dropdown(
                    id='piechart-country-dropdown',
                    options=[{'label': country, 'value': country} for country in sorted(data['country'].unique())],
                    value='China',
                    placeholder="Select a Country for Pie Chart",
                    style={'marginBottom': '10px'}
                ),
                dcc.Graph(
                    id='pie-chart',
                    style={'height': '400px', 'width': '100%'}
                )
            ])
        ], style={
            'flex': '1',
            'padding': '20px',
            'boxShadow': '0px 4px 8px rgba(0, 0, 0, 0.2)',
            'marginLeft': '10px',
            'height': '800px',
            'overflowY': 'scroll',
        })
    ], style={'display': 'flex', 'width': '90%', 'margin': 'auto'})
])


@app.callback(
    Output('choropleth-map', 'figure'),
    [Input('metric-dropdown', 'value'),
     Input('date-slider', 'value')]
)
def update_map(selected_normalized_metric, selected_date_index):
    unique_dates = sorted(data['date'].unique()) #get dates
    selected_date = unique_dates[selected_date_index]
    filtered_df = data[data['date'] == selected_date] #get the data for a given date

    hover_data = {
        'country': True,
        'population': ':,.2f',
        selected_normalized_metric: True
    }

    fig = px.choropleth(
        filtered_df,
        locations='country',
        locationmode='country names',
        color=selected_normalized_metric,
        hover_name='country',
        hover_data=hover_data,
        title=f"{selected_normalized_metric.replace('_', ' ').title()} on {selected_date.strftime('%Y-%m-%d')}",
        color_continuous_scale='Viridis'
    )

    fig.update_coloraxes(
        colorbar=dict(
            title=selected_normalized_metric.replace('_', ' ').title() #rename the scale so it is clear
        )
    )
    return fig

@app.callback(
    [Output('linechart-1-country-dropdown', 'value'),
     Output('linechart-2-country-dropdown', 'value'),
     Output('piechart-country-dropdown', 'value')],
    [Input('choropleth-map', 'clickData')]
)
def update_dropdowns_on_map_click(click_data):
    #keep the clicks and dropdowns in sync
    if click_data is not None:
        selected_country = click_data['points'][0]['location']
        return selected_country, selected_country, selected_country
    return no_update, no_update, no_update

@app.callback(
    [Output('line-chart-1', 'figure'),
     Output('line-chart-2', 'figure'),
     Output('pie-chart', 'figure')],
    [Input('linechart-1-country-dropdown', 'value'),
     Input('linechart-2-country-dropdown', 'value'),
     Input('piechart-country-dropdown', 'value'),
     Input('date-slider', 'value')]
)
def update_charts(country1, country2, pie_country, selected_date_index):
    unique_dates = sorted(data['date'].unique())
    selected_date = unique_dates[selected_date_index]

    def get_country_date_range(country):
        country_data = data[data['country'] == country]
        if not country_data.empty:
            return country_data['date'].min(), country_data['date'].max() #time range for each country to display correct ranges on the plots
        return None, None

    date_range1 = get_country_date_range(country1)
    date_range2 = get_country_date_range(country2)

    def adjust_date_range(date_range, selected_date):
        if date_range[0] is None or date_range[1] is None:
            return None #no data available
        if selected_date == date_range[0]:
            #if the selected date is the first in data display range from the selected date to date+1 so the plot makes sense
            extended_date = unique_dates[min(selected_date_index + 1, len(unique_dates) - 1)]
            return date_range[0], min(date_range[1], extended_date)
        return date_range[0], min(date_range[1], selected_date)

    adjusted_date_range1 = adjust_date_range(date_range1, selected_date)
    adjusted_date_range2 = adjust_date_range(date_range2, selected_date)
    

    filtered_data1 = data[
        (data['country'] == country1) &
        (data['date'] >= adjusted_date_range1[0]) &
        (data['date'] <= adjusted_date_range1[1])
    ] if adjusted_date_range1 else pd.DataFrame() #if range exists filter the data

    filtered_data2 = data[
        (data['country'] == country2) &
        (data['date'] >= adjusted_date_range2[0]) &
        (data['date'] <= adjusted_date_range2[1])
    ] if adjusted_date_range2 else pd.DataFrame()

    filtered_data_pie = data[
    (data['country'] == pie_country) & (data['date'] == selected_date)
    ] if pie_country else pd.DataFrame()


    #display the linecharts if there are any
    if not filtered_data1.empty:
        fig1 = px.line(
            filtered_data1,
            x='date',
            y='cumulative_total_cases',
            title=f"Cumulative Total Cases in {country1} <br>(from {adjusted_date_range1[0].strftime('%Y-%m-%d')} to {adjusted_date_range1[1].strftime('%Y-%m-%d')})"
        )
        fig1.update_layout(
            xaxis_title="Date",
            yaxis_title="Cumulative Total Cases"
        )
    else:
        fig1 = px.scatter()
        fig1.update_layout(
            title=f"No data available for {country1}",
            xaxis=dict(visible=False),
            yaxis=dict(visible=False),
            annotations=[dict(text="No data available", xref="paper", yref="paper", showarrow=False, font=dict(size=15, color="red"))]
        )

    if not filtered_data2.empty:
        fig2 = px.line(
            filtered_data2,
            x='date',
            y='cumulative_total_deaths',
            title=f"Cumulative Total Deaths in {country2} <br>(from {adjusted_date_range2[0].strftime('%Y-%m-%d')} to {adjusted_date_range2[1].strftime('%Y-%m-%d')})"
        )
        fig2.update_layout(
            xaxis_title="Date",
            yaxis_title="Cumulative Total Deaths"
        )
    else:
        fig2 = px.scatter()
        fig2.update_layout(
            title=f"No data available for {country2}",
            xaxis=dict(visible=False),
            yaxis=dict(visible=False),
            annotations=[dict(text="No data available", xref="paper", yref="paper", showarrow=False, font=dict(size=15, color="red"))]
        )

    if not filtered_data_pie.empty:
        row = filtered_data_pie.iloc[0] #data z vybraneho dna
        pie_values = [
            row.get('daily_new_cases', 0) if not pd.isna(row.get('daily_new_cases')) else 0,
            row.get('active_cases', 0) if not pd.isna(row.get('active_cases')) else 0,
            row.get('daily_new_deaths', 0) if not pd.isna(row.get('daily_new_deaths')) else 0
        ]
        #check if at least one value is non-zero
        if sum(pie_values) > 0:
            fig3 = px.pie(
                values=pie_values,
                names=['Daily New Cases', 'Active Cases', 'Daily New Deaths'],
                title=f"New Cases vs Active Cases vs Deaths in {pie_country} <br>(as of {selected_date.strftime('%Y-%m-%d')})"
            )
        else:
            fig3 = px.pie()
            fig3.update_layout(
                title=f"No significant data available for {pie_country}",
                annotations=[
                    dict(
                        text="No data to display",
                        x=0.5,
                        y=0.5,
                        showarrow=False,
                        font=dict(size=15, color="red"),
                        xanchor="center",
                        yanchor="middle"
                    )
                ]
            )
    else:
        #no data available
        fig3 = px.pie()
        fig3.update_layout(
            title=f"No data available for {pie_country}",
            annotations=[
                dict(
                    text="No data available",
                    x=0.5,
                    y=0.5,
                    showarrow=False,
                    font=dict(size=15, color="red"),
                    xanchor="center",
                    yanchor="middle"
                )
            ]
        )


    return fig1, fig2, fig3

if __name__ == '__main__':
    app.run_server(port=8050, debug=True,  jupyter_mode="external")


Dash app running on http://127.0.0.1:8050/
