<a href="https://colab.research.google.com/github/martinpius/practicals_1/blob/main/BigDataVizualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# install important packages
! pip install holoviews
! pip install panel
! pip install datashader
! pip install jupyter_bokeh
! pip install -U nbformat
! pip install geopy

In [None]:
import pandas as pd
import holoviews as hv
from holoviews import opts
import plotly.express as px
import panel as pn
import requests
import bokeh
from io import StringIO
hv.extension('bokeh')

In [None]:
# URL to large CSV
url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"

output_path = "new_covid_data.csv"

def download_large_file(url, output_path):
    session = requests.Session()
    retry_count = 3
    for attempt in range(retry_count):
        try:
            with session.get(url, stream=True, timeout=60) as r:
                r.raise_for_status()  # Raises HTTPError if status != 200
                with open(output_path, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        if chunk:  # Filter out keep-alive chunks
                            f.write(chunk)
            print(f"Download completed: {output_path}")
            return
        except (requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError) as e:
            print(f"Attempt {attempt+1} failed: {e}")
    print("Failed to download the file after multiple attempts.")

# Run the download

In [None]:
download_large_file(url, output_path)

In [None]:
df = pd.read_csv("new_covid_data.csv")

In [None]:
display(df.head(10))

In [None]:
# Optional: Filter to specific countries
countries = ["Germany", "Russia", "India", "Italy", "Spain", "China", "Senegal"]
df = df[df["location"].isin(countries)]

In [None]:
df['location'].unique() # See the selected countries

In [None]:
# Create dataset
dataset = hv.Dataset(df, kdims=["location", "date"], vdims=["new_cases"])

# Create individual curves per location
curves = dataset.to(hv.Curve, kdims=["date"], vdims=["new_cases"]).overlay('location')

# Display without datashading
curves.opts(
    opts.Curve(width=900, height=400, tools=['hover'], title="New COVID-19 Cases: East & West Africa")
)

In [None]:
import plotly.io as pio
import plotly.io as pio
pio.renderers.default = 'colab'

# HIGH-PERFORMANCE VISUALIZATION IN PURE PYTHON
# A. Time Series (Holoviews + Datashader)
# Set renderer to browser
pio.renderers.default = "browser"
# Filter for latest date
latest = df[df['date'] == df['date'].max()]

print(latest[['location', 'new_cases']].dropna())
print(latest['new_cases'].sum())


# # Plot
# fig = px.bar(latest, x='location', y='new_cases', title="New Cases by Country")
# fig.show()

In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

# Set Plotly renderer for Colab
pio.renderers.default = 'colab'

# Ensure date is datetime
df['date'] = pd.to_datetime(df['date'])

# Filter to rows with new_cases > 0
non_zero = df[df['new_cases'] > 0]

# Get most recent date with non-zero cases
latest_date = non_zero['date'].max()

# Filter dataset
latest = df[df['date'] == latest_date]

# Sort and show top 10 countries
top = latest.sort_values(by='new_cases', ascending=False).head(10)

# Plot
fig = px.bar(top, x='location', y='new_cases', title=f"New COVID-19 Cases by Country ({latest_date.date()})")
fig.show()

In [None]:
# Re-Load & clean data
df = pd.read_csv("new_covid_data.csv")
df['date'] = pd.to_datetime(df['date'])
df['new_cases'] = pd.to_numeric(df['new_cases'], errors='coerce').fillna(0)
df = df[df['location'].notna()]

In [None]:
# Get latest date with non-zero new cases
non_zero = df[df['new_cases'] > 0]
latest_date = non_zero['date'].max()
latest = df[df['date'] == latest_date]
top10 = latest.sort_values('new_cases', ascending=False).head(10)

In [None]:
# Display top 10 countries with more new cases

In [None]:
top10.head(10)

In [None]:
# Unique countries for selection
countries = sorted(df['location'].unique().tolist())
default_countries = ['Europe', 'Asia', 'Italy']

In [None]:
# Set Widgets for dashboard
country_select = pn.widgets.MultiSelect(name='Select Countries',
            options=countries, value=default_countries, size=10)

In [None]:
# --- Geospatial COVID-19 Plot for Europe & Americas ---

# Focus on selected countries with reliable data
europe_america_countries = [
    # Europe
    "Germany", "France", "Italy", "Spain", "United Kingdom", "Poland", "Netherlands", "Greece",
    # North America
    "United States", "Canada", "Mexico",
    # South America
    "Brazil", "Argentina", "Colombia", "Chile", "Peru"
]

# Subset the DataFrame (assuming `df` is already defined with proper date/new_cases cleaning)
latest_date = df[df['new_cases'] > 0]['date'].max()
latest_df = df[(df['date'] == latest_date) & (df['location'].isin(europe_america_countries))]

# Hardcoded coordinates
coord_data = {
    "Germany": (51.1657, 10.4515),
    "France": (46.2276, 2.2137),
    "Italy": (41.8719, 12.5674),
    "Spain": (40.4637, -3.7492),
    "United Kingdom": (55.3781, -3.4360),
    "Poland": (51.9194, 19.1451),
    "Netherlands": (52.1326, 5.2913),
    "Greece": (39.0742, 21.8243),
    "United States": (37.0902, -95.7129),
    "Canada": (56.1304, -106.3468),
    "Mexico": (23.6345, -102.5528),
    "Brazil": (-14.2350, -51.9253),
    "Argentina": (-38.4161, -63.6167),
    "Colombia": (4.5709, -74.2973),
    "Chile": (-35.6751, -71.5430),
    "Peru": (-9.1899, -75.0152)
}

# Assign coordinates
latest_df['latitude'] = latest_df['location'].map(lambda x: coord_data.get(x, (None, None))[0])
latest_df['longitude'] = latest_df['location'].map(lambda x: coord_data.get(x, (None, None))[1])
geo_df = latest_df.dropna(subset=['latitude', 'longitude'])

# Create geospatial figure
geo_fig = px.scatter_geo(
    geo_df,
    lat='latitude',
    lon='longitude',
    hover_name='location',
    size='new_cases',
    color='new_cases',
    projection='natural earth',
    title=f"COVID-19 New Cases in Europe and the Americas ({latest_date.date()})",
    color_continuous_scale='Reds'
)

#geo_fig.update_layout(margin=dict(l=0, r=0, t=50, b=0))


In [None]:
df = pd.read_csv("new_covid_data.csv")
df['date'] = pd.to_datetime(df['date'])
df['new_cases'] = pd.to_numeric(df['new_cases'], errors='coerce').fillna(0)

# Most recent date with actual new cases
latest_nonzero_date = df[df['new_cases'] > 0]['date'].max()
latest_df = df[df['date'] == latest_nonzero_date]
top10 = latest_df.sort_values("new_cases", ascending=False).head(10)

# Country selector widget
countries = sorted(df['location'].unique())
country_select = pn.widgets.MultiSelect(
    name="Select Countries",
    options=countries,
    value=["Nigeria", "Kenya", "Ghana"],
    size=10,
    width=300
)

# Bar chart function
def bar_chart():
    fig = px.bar(top10, x='location', y='new_cases',
                 title=f"Top 10 Countries by New COVID-19 Cases ({latest_nonzero_date.date()})",
                 color='new_cases', color_continuous_scale='OrRd')
    fig.update_layout(margin=dict(l=20, r=20, t=40, b=20), height=400)
    return fig

# Line chart (remove decorator!)
def line_chart(selected):
    d = df[df['location'].isin(selected)]
    fig = px.line(d, x='date', y='new_cases', color='location',
                  title="New COVID-19 Cases Over Time")
    fig.update_layout(margin=dict(l=20, r=20, t=40, b=20), height=400)
    return fig

# Heatmap function
def heatmap():
    heat_data = df.pivot_table(index='location', columns='date', values='new_cases', fill_value=0)
    fig = px.imshow(
        heat_data.values,
        labels=dict(x="Date", y="Country", color="New Cases"),
        x=heat_data.columns.strftime('%Y-%m-%d'),
        y=heat_data.index,
        color_continuous_scale="Viridis",
        title="Heatmap: New Cases by Country Over Time"
    )
    fig.update_layout(height=600)
    return fig

# Bind the interactive chart
line_chart_panel = pn.bind(line_chart, country_select)

# Panel layout
dashboard = pn.template.FastListTemplate(
    title="COVID-19 Dashboard: East & West Africa",
    sidebar=[pn.pane.Markdown("### Filters"), country_select],
    main=[
        pn.pane.Plotly(bar_chart(), config={"responsive": True}),
        pn.Spacer(height=20),
        line_chart_panel,
        pn.Spacer(height=20),
        pn.pane.Plotly(geo_fig, config={"responsive": True}),
        pn.pane.Plotly(heatmap(), config={"responsive": True})
    ]
)
pn.extension('plotly', sizing_mode="stretch_width")
#dashboard.show()
dashboard.servable()