In [None]:
# install important packages
! pip install holoviews
! pip install panel
! pip install datashader
! pip install jupyter_bokeh
! pip install -U nbformat
! pip install geopy 

In [2]:
import pandas as pd
import holoviews as hv
from holoviews import opts
import plotly.express as px
import panel as pn
import requests
# import bokeh
# from io import StringIO
hv.extension('bokeh')


In [3]:
# URL to large CSV
url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"

output_path = "new_covid_data.csv"

def download_large_file(url, output_path):
    session = requests.Session()
    retry_count = 3
    for attempt in range(retry_count):
        try:
            with session.get(url, stream=True, timeout=60) as r:
                r.raise_for_status()  # Raises HTTPError if status != 200
                with open(output_path, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        if chunk:  # Filter out keep-alive chunks
                            f.write(chunk)
            print(f"Download completed: {output_path}")
            return
        except (requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError) as e:
            print(f"Attempt {attempt+1} failed: {e}")
    print("Failed to download the file after multiple attempts.")

# Run the download

In [4]:
download_large_file(url, output_path)

Download completed: new_covid_data.csv


In [5]:
new_df = pd.read_csv("new_covid_data.csv")

In [6]:
new_df.head(2)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-01-05,0.0,0.0,,0.0,0.0,,...,,37.746,0.5,64.83,0.511,41128772,,,,
1,AFG,Asia,Afghanistan,2020-01-06,0.0,0.0,,0.0,0.0,,...,,37.746,0.5,64.83,0.511,41128772,,,,


In [7]:
new_df.shape

(429435, 67)

In [10]:
# Load CSV into DataFrame
df = pd.read_csv("new_covid_data.csv", usecols=["location", "date", "new_cases", "total_cases"])

In [11]:
# Optional: Filter to specific countries
countries = ["Germany", "Russia", "India", "Italy", "Spain", "China", "Senegal"]
df = new_df[new_df["location"].isin(countries)]


In [12]:
df['location'].unique()

array(['China', 'Germany', 'India', 'Italy', 'Russia', 'Senegal', 'Spain'],
      dtype=object)

In [13]:
df.head(3)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
73670,CHN,Asia,China,2020-01-05,1.0,1.0,,0.0,0.0,,...,48.4,,4.34,76.91,0.761,1425887360,,,,
73671,CHN,Asia,China,2020-01-06,1.0,0.0,,0.0,0.0,,...,48.4,,4.34,76.91,0.761,1425887360,,,,
73672,CHN,Asia,China,2020-01-07,1.0,0.0,,0.0,0.0,,...,48.4,,4.34,76.91,0.761,1425887360,,,,


In [14]:
# Create dataset
dataset = hv.Dataset(df, kdims=["location", "date"], vdims=["new_cases"])

# Create individual curves per location
curves = dataset.to(hv.Curve, kdims=["date"], vdims=["new_cases"]).overlay('location')

# Display without datashading
curves.opts(
    opts.Curve(width=900, height=400, tools=['hover'], title="New COVID-19 Cases: East & West Africa")
)

In [15]:
# HIGH-PERFORMANCE VISUALIZATION IN PURE PYTHON
# A. Time Series (Holoviews + Datashader)

In [16]:
import plotly.io as pio

# Set renderer to browser
pio.renderers.default = "browser"
# Filter for latest date
latest = df[df['date'] == df['date'].max()]

print(latest[['location', 'new_cases']].dropna())
print(latest['new_cases'].sum())


# # Plot
# fig = px.bar(latest, x='location', y='new_cases', title="New Cases by Country")
# fig.show()

Empty DataFrame
Columns: [location, new_cases]
Index: []
0.0


In [17]:
# Ensure date is datetime
df['date'] = pd.to_datetime(df['date'])

# Filter to rows with new_cases > 0
non_zero = df[df['new_cases'] > 0]

# Get most recent date with non-zero cases
latest_date = non_zero['date'].max()

# Filter dataset
latest = df[df['date'] == latest_date]

# Sort and show top 10 countries
top = latest.sort_values(by='new_cases', ascending=False).head(10)

# Plot
fig = px.bar(top, x='location', y='new_cases', title=f"New COVID-19 Cases by Country ({latest_date.date()})")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [20]:
# Load & clean data
df = pd.read_csv("new_covid_data.csv")
df['date'] = pd.to_datetime(df['date'])
df['new_cases'] = pd.to_numeric(df['new_cases'], errors='coerce').fillna(0)
df = df[df['location'].notna()]


In [21]:
# Get latest date with non-zero new cases
non_zero = df[df['new_cases'] > 0]
latest_date = non_zero['date'].max()
latest = df[df['date'] == latest_date]
top10 = latest.sort_values('new_cases', ascending=False).head(10)

In [22]:
top10.head(10)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
424402,OWID_WRL,,World,2024-08-04,775866783.0,47169.0,6738.429,7057132.0,815.0,116.429,...,34.635,60.13,2.705,72.58,0.737,7975105024,,,,
120242,OWID_EUR,,Europe,2024-08-04,252916868.0,39047.0,5578.143,2102483.0,162.0,23.143,...,,,,,,744807803,,,,
166872,OWID_HIC,,High-income countries,2024-08-04,429044049.0,32293.0,4613.286,2997359.0,786.0,112.286,...,,,,,,1250514600,,,,
123276,OWID_EUN,,European Union (27),2024-08-04,185822587.0,25642.0,3663.143,1262988.0,150.0,21.429,...,,,,,,450146793,,,,
409811,OWID_UMC,,Upper-middle-income countries,2024-08-04,251753518.0,14277.0,2039.571,2824452.0,29.0,4.143,...,,,,,,2525921300,,,,
317756,RUS,Europe,Russia,2024-08-04,24268728.0,7777.0,1111.0,403188.0,10.0,1.429,...,58.3,,8.05,72.58,0.824,144713312,,,,
186948,ITA,Europe,Italy,2024-08-04,26781078.0,6350.0,907.143,197307.0,6.0,0.857,...,27.8,,3.18,83.51,0.892,59037472,,,,
147106,GRC,Europe,Greece,2024-08-04,5673681.0,5818.0,831.143,39258.0,40.0,5.714,...,52.0,,4.21,82.24,0.888,10384972,,,,
316082,ROU,Europe,Romania,2024-08-04,3541619.0,4633.0,661.857,68825.0,6.0,0.857,...,37.1,,6.892,76.05,0.828,19659270,,,,
21765,OWID_ASI,,Asia,2024-08-04,301499099.0,4515.0,645.0,1637249.0,17.0,2.429,...,,,,,,4721383370,,,,


In [23]:
# Unique countries for selection
countries = sorted(df['location'].unique().tolist())
default_countries = ['Europe', 'Asia', 'Italy']

In [24]:
countries

['Afghanistan',
 'Africa',
 'Albania',
 'Algeria',
 'American Samoa',
 'Andorra',
 'Angola',
 'Anguilla',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Aruba',
 'Asia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bermuda',
 'Bhutan',
 'Bolivia',
 'Bonaire Sint Eustatius and Saba',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'British Virgin Islands',
 'Brunei',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Cape Verde',
 'Cayman Islands',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Comoros',
 'Congo',
 'Cook Islands',
 'Costa Rica',
 "Cote d'Ivoire",
 'Croatia',
 'Cuba',
 'Curacao',
 'Cyprus',
 'Czechia',
 'Democratic Republic of Congo',
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'East Timor',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'England',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia',
 'Eswatini

In [25]:
# Widgets
country_select = pn.widgets.MultiSelect(name='Select Countries',
            options=countries, value=default_countries, size=10)

In [26]:
# --- Geospatial COVID-19 Plot for Europe & Americas ---

# Focus on selected countries with reliable data
europe_america_countries = [
    # Europe
    "Germany", "France", "Italy", "Spain", "United Kingdom", "Poland", "Netherlands", "Greece",
    # North America
    "United States", "Canada", "Mexico",
    # South America
    "Brazil", "Argentina", "Colombia", "Chile", "Peru"
]

# Subset the DataFrame (assuming `df` is already defined with proper date/new_cases cleaning)
latest_date = df[df['new_cases'] > 0]['date'].max()
latest_df = df[(df['date'] == latest_date) & (df['location'].isin(europe_america_countries))]

# Hardcoded coordinates
coord_data = {
    "Germany": (51.1657, 10.4515),
    "France": (46.2276, 2.2137),
    "Italy": (41.8719, 12.5674),
    "Spain": (40.4637, -3.7492),
    "United Kingdom": (55.3781, -3.4360),
    "Poland": (51.9194, 19.1451),
    "Netherlands": (52.1326, 5.2913),
    "Greece": (39.0742, 21.8243),
    "United States": (37.0902, -95.7129),
    "Canada": (56.1304, -106.3468),
    "Mexico": (23.6345, -102.5528),
    "Brazil": (-14.2350, -51.9253),
    "Argentina": (-38.4161, -63.6167),
    "Colombia": (4.5709, -74.2973),
    "Chile": (-35.6751, -71.5430),
    "Peru": (-9.1899, -75.0152)
}

# Assign coordinates
latest_df['latitude'] = latest_df['location'].map(lambda x: coord_data.get(x, (None, None))[0])
latest_df['longitude'] = latest_df['location'].map(lambda x: coord_data.get(x, (None, None))[1])
geo_df = latest_df.dropna(subset=['latitude', 'longitude'])

# Create geospatial figure
geo_fig = px.scatter_geo(
    geo_df,
    lat='latitude',
    lon='longitude',
    hover_name='location',
    size='new_cases',
    color='new_cases',
    projection='natural earth',
    title=f"COVID-19 New Cases in Europe and the Americas ({latest_date.date()})",
    color_continuous_scale='Reds'
)

#geo_fig.update_layout(margin=dict(l=0, r=0, t=50, b=0))




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [28]:

df = pd.read_csv("new_covid_data.csv")
df['date'] = pd.to_datetime(df['date'])
df['new_cases'] = pd.to_numeric(df['new_cases'], errors='coerce').fillna(0)

# Most recent date with actual new cases
latest_nonzero_date = df[df['new_cases'] > 0]['date'].max()
latest_df = df[df['date'] == latest_nonzero_date]
top10 = latest_df.sort_values("new_cases", ascending=False).head(10)

# Country selector widget
countries = sorted(df['location'].unique())
country_select = pn.widgets.MultiSelect(
    name="Select Countries",
    options=countries,
    value=["Nigeria", "Kenya", "Ghana"],
    size=10,
    width=300
)

# Bar chart function
def bar_chart():
    fig = px.bar(top10, x='location', y='new_cases',
                 title=f"Top 10 Countries by New COVID-19 Cases ({latest_nonzero_date.date()})",
                 color='new_cases', color_continuous_scale='OrRd')
    fig.update_layout(margin=dict(l=20, r=20, t=40, b=20), height=400)
    return fig

# Line chart (remove decorator!)
def line_chart(selected):
    d = df[df['location'].isin(selected)]
    fig = px.line(d, x='date', y='new_cases', color='location',
                  title="New COVID-19 Cases Over Time")
    fig.update_layout(margin=dict(l=20, r=20, t=40, b=20), height=400)
    return fig

# Heatmap function
def heatmap():
    heat_data = df.pivot_table(index='location', columns='date', values='new_cases', fill_value=0)
    fig = px.imshow(
        heat_data.values,
        labels=dict(x="Date", y="Country", color="New Cases"),
        x=heat_data.columns.strftime('%Y-%m-%d'),
        y=heat_data.index,
        color_continuous_scale="Viridis",
        title="Heatmap: New Cases by Country Over Time"
    )
    fig.update_layout(height=600)
    return fig

# Bind the interactive chart
line_chart_panel = pn.bind(line_chart, country_select)

# Panel layout
dashboard = pn.template.FastListTemplate(
    title="COVID-19 Dashboard: East & West Africa",
    sidebar=[pn.pane.Markdown("### Filters"), country_select],
    main=[
        pn.pane.Plotly(bar_chart(), config={"responsive": True}),
        pn.Spacer(height=20),
        line_chart_panel,
        pn.Spacer(height=20),
        pn.pane.Plotly(geo_fig, config={"responsive": True}),
        pn.pane.Plotly(heatmap(), config={"responsive": True})
    ]
)
pn.extension('plotly', sizing_mode="stretch_width")
dashboard.show()
#dashboard.servable()

Launching server at http://localhost:49942


<panel.io.server.Server at 0x326a2faa0>