# DIY Covid Tracking Dashboard # 
ECS780P Computer Programming Coursework 
by **Theresa Rivera To**, 240419758, MSc Computing and Information Systems


### Purpose  
Track and visualise COVID-19 cases across London boroughs using an interactive dashboard. This helps you dive into borough-specific trends or check aggregated data.

### Features  
- Easy dropdowns to filter by borough, year, and month.  
- Automatic data fetching and plotting from the API.  
- Live updates and progress tracking during data fetches.
- Save button functionality.

---

### Essential Information  
#### File Structure  
- **Notebook**: `covid-dashboard.ipynb`  
- **Scripts**:  
  - `api_wrapper.py`: Wraps API-specific logic for reusability.  
- **Data Folder**:  
  - `combined_df.json`: Saves fetched data locally.
- **Classes**:
  - `class Fetcher`: Handles API calls and data fetching. Also modularises saving data.

---

### How to Use  
1. Run the notebook and load the saved data to visualise immediately.  
2. If the data is outdated, click the "Fetch Data" button to retrieve the latest information.  
3. Use the dropdowns to filter data by borough, year, or month.  
4. View progress in the notebook while fetching.
5. Save data after fetching to get the latest JSON file.  

#### Note  
Ensure the required Python libraries are installed. A `requirements.txt` file is provided for convenience.

# Data Visualisation

In [1]:
from IPython.display import display, clear_output, FileLink
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import Output, interact, widgets, Button, HBox
import time
import gzip

import json

from api_wrapper import APIwrapper

%matplotlib inline

In [2]:
# ----------------------------------- MODULARISE WITH WRAPPER ----------------------------------- #

# TODO function outside API class --> Turns data into a Pandas DataFrame
def fetch_data_with_wrapper(geography_type, borough, metric_name):
    """
    Fetch all pages of data for a given metric using the APIwrapper.
    Returns a Pandas DataFrame.
    """
    structure = {
        "theme": "infectious_disease",
        "sub_theme": "respiratory",
        "topic": "COVID-19",
        "geography_type": geography_type,
        "geography": borough,
        "metric": metric_name,
    }

    api = APIwrapper(**structure)

    try:
        data = api.get_all_pages()
        return pd.DataFrame(data)
    except Exception as e:
        print(f"Error fetching data for metric {metric_name}: {e}")
        return pd.DataFrame()

In [3]:
# ----------------------------------- SET UP FETCHER ----------------------------------- #

class Fetcher:
    """
    High-level class for fetching and combining data for multiple boroughs. Uses the fetch_data_with_wrapper that uses APIwrapper provided. (ツ)_/¯ 
    """

    # TODO 1: initialise this fetcher. When you initialise, this will require you to add your params
    def __init__(self, geography_type, metric_name):
        self.geography_type = geography_type
        self.metric_name = metric_name
        self.borough_data = {}
        self.output = Output()  # Add an Output widget

    def fetch_borough_data(self, borough):
        """
        Fetch data for a single borough and store it in the borough_data dictionary.
        """
        with self.output:
            borough_name = borough.replace("%20", " ")
            print(f"Fetching data for {borough_name}...")
            try:
                # TODO 2: Use the outside function for fetchin (comes from APIwrapper module)
                data = fetch_data_with_wrapper(self.geography_type, borough, self.metric_name)

                if not data.empty:
                    data["borough"] = borough_name
                    self.borough_data[borough] = data
                    print(f"✓ {borough_name} - {len(data)} records fetched.")
                else:
                    print(f"✗ {borough_name} - No data available.")
            except Exception as e:
                print(f"✗ Error fetching data for {borough_name}: {e}")

    def fetch_all_boroughs(self, borough_list):
        """
        Fetch data for all boroughs in the given list and combine into a single DataFrame.
        """
        with self.output:
            # TODO 3: The borough list is in notebook. Iterate through each so you can get all teh data
            for borough in borough_list:
                self.fetch_borough_data(borough)  # Fetch data for each borough
            print() 

            # TODO 4: Bec a lot of boroughs, we'll combine them into one DF
            if self.borough_data:
                combined_df = pd.concat(self.borough_data.values(), ignore_index=True)
                combined_df["date"] = pd.to_datetime(combined_df["date"])  # Ensure date column is datetime
                print(f"SUCCESS! Combined data contains {len(combined_df)} rows across {len(self.borough_data)} boroughs.")
                return combined_df
            else:
                print("No data was fetched for any borough.")
                return pd.DataFrame()

    def display_output(self):
        """
        Display the Output widget for Voila compatibility.
        """
        display(self.output)

    # TODO 5: Save this into a JSON file. Call this every time BUTTON in next module wants to reload data
    def save_and_download_gzipped_file(self, dataframe, filename="combined_df.json.gz"):
        """
        Save the DataFrame as a gzipped JSON file and generate a download link.
        """
        print("Saving file as gzipped JSON...")
        try:
            if not dataframe.empty:
                # Save the DataFrame to a gzipped JSON file
                with gzip.open(filename, "wt") as f:
                    dataframe.to_json(f, orient="records", indent=4)
                print(f"Data successfully saved to '{filename}'")
                
                # Generate and display a download link
                display(FileLink(filename))
            else:
                print("No data available to save.")
        except Exception as e:
            print(f"Error saving data: {e}")

In [4]:
# ----------------------------------- TIME FOR ACTION ----------------------------------- #

# Identify global parameters we're feeding
geography_type = "Lower%20Tier%20Local%20Authority"
metrics = "COVID-19_cases_casesByDay"

london_boroughs = ["Barking%20and%20Dagenham", "Barnet", "Bexley", "Brent", "Bromley", "Camden", "Croydon", "Ealing",
                   "Enfield", "Greenwich", "Hackney%20and%20City%20of%20London", "Hammersmith%20and%20Fulham", "Haringey", "Harrow", "Havering",
                   "Hillingdon", "Hounslow", "Islington", "Kensington%20and%20Chelsea", "Kingston%20upon%20Thames",
                   "Lambeth", "Lewisham", "Merton", "Newham", "Redbridge", "Richmond%20upon%20Thames", "Southwark",
                   "Sutton", "Tower%20Hamlets", "Waltham%20Forest", "Wandsworth", "Westminster"]

In [5]:
# Instantiate the Fetcher module created for London Boroughs
fetcher = Fetcher(geography_type, metrics)

In [6]:
# Combine borough data into single JSON and SAVE it (filepath already determined in function)
"""fetcher.save_combined_data(london_boroughs) is commented out once the data is saved the first time as offline data. There is a save button at the bottom of the widget which also allows you to save the latest data."""
# combined_df = fetcher.fetch_all_boroughs(london_boroughs)
#fetcher.save_combined_data(combined_df, "combined_df.json")

'fetcher.save_combined_data(london_boroughs) is commented out once the data is saved the first time as offline data. There is a save button at the bottom of the widget which also allows you to save the latest data.'

In [7]:
# Output widget for rendering plots
output_widget = Output()

# ----------------------------------- PLOTTING ----------------------------------- #

# TODO: Set up the plot
def plot_cases(cases_df, year=None, month=None, boroughs=None):
    """
    Plot cases for London boroughs with optional filtering by year, month, and boroughs.
    """

    # # TODO debug!!
    # print("Initial DataFrame:")
    # print(cases_df.head())  # Debug: Print the DataFrame before filtering

    if cases_df is None or cases_df.empty:
        with output_widget:
            output_widget.clear_output(wait=True)
            print("Cannot plot. DataFrame is missing or empty.")
        return

    # Filter data by year
    if year and year != "All":
        cases_df = cases_df[cases_df["date"].dt.year == int(year)]

    # Filter data by month
    if month and month != "All":
        cases_df = cases_df[cases_df["date"].dt.month == int(month)]

    # TODO: DEBUG BOROUGHS
    # Filter data by boroughs
    if boroughs and "All" not in boroughs:
        # print("Filtering for boroughs:", boroughs)  # Debug
        cases_df = cases_df[cases_df["borough"].isin(boroughs)]
    # Sort boroughs alphabetically before plotting
    sorted_boroughs = sorted(cases_df["borough"].unique())

    # print("Filtered DataFrame after applying boroughs, year, and month:")
    # print(cases_df.head())  # Debug

    # Plot the filtered data
    with output_widget:
        output_widget.clear_output(wait=True)
        plt.figure(figsize=(16, 7))

        for borough in sorted_boroughs:
            borough_data = cases_df[cases_df["borough"] == borough]
            plt.plot(borough_data["date"], borough_data["metric_value"], label=borough)

        plt.title("COVID-19 Cases in London Boroughs per 100,000 People")
        plt.xlabel("Date")
        plt.ylabel("Number of Cases")
        plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1), fontsize="small", title="Boroughs")
        plt.grid(True)
        plt.tight_layout()
        plt.show()


# --------------------------------- HELPER: LOAD JSON -------------------------------- #

# TODO: Load the initial JSON
def load_initial_data(filepath="combined_df.json.gz"):
    """
    Load the initial data from the JSON file for offline access.
    """
    try:
        with gzip.open(filepath, "rt") as f:
            json_data = json.load(f)
        print("Loaded initial data successfully.")
        df = pd.DataFrame(json_data)
        df["date"] = pd.to_datetime(df["date"], unit='ms')
        df = df.sort_values(by="date").reset_index(drop=True)
        return df
    except FileNotFoundError:
        print(f"File {filepath} not found. Please fetch data using the 'Fetch Data' button.")
        return None


# ------------------------------ CREATE WIDGET ------------------------------ #

def update_cases_plot(cases_df, year, month, boroughs):
    """
    Update the plot dynamically based on widget values.
    We will be calling this in create_widgets(df) below!
    """
    # print(f"Year: {year}, Month: {month}, Boroughs: {boroughs}")  # Debug print
    plot_cases(cases_df, year=year, month=month, boroughs=boroughs)


def create_widgets(cases_df):
    """
    Create interactive widgets and display them.
    """
    # TODO Debugging: Ensure DataFrame is passed correctly
    # print("Initial DataFrame passed to create_widgets:")
    # print(cases_df.head())
    # print("Unique boroughs in DataFrame:", cases_df["borough"].unique())  # Debug

    # TODO: Detail the dropdown widgets
    year_dropdown = widgets.Dropdown(
        options=["All", "2020", "2021", "2022", "2023", "2024"],
        value="All",
        description="Year:"
    )

    month_dropdown = widgets.Dropdown(
        options=["All"] + [f"{i:02d}" for i in range(1, 13)],
        value="All",
        description="Month:"
    )

    borough_dropdown = widgets.SelectMultiple(
        options=["All"] + sorted(cases_df["borough"].unique()),
        value=("All",),
        description="Borough(s):",
        layout=widgets.Layout(width="50%")
    )

    fetch_button = Button(
        description="Fetch New Data",
        button_style='primary',
        tooltip="Fetch latest data from the API",
        icon="refresh"
    )

    save_button = Button(
        description="Save All Data",
        button_style="success",
        tooltip="Save the current data to JSON file",
        icon="save"
    )

    # FETCH BUTTON SETUP
    def fetch_button_callback(button):
        global cases_df, london_boroughs, geography_type, metrics
        print("Fetching data...")

        # TODO: Reload the output_widget with new data
        with output_widget:  # Use the global `output_widget` to show progress to solve progress issue
            clear_output(wait=True)
            print("FETCH REQUESTED. Please wait as it may take a while.")
            fetcher.display_output()  # Display Fetcher's Output widget in the notebook. Have to do this because can't see progress
            cases_df = fetcher.fetch_all_boroughs(london_boroughs)  # Reuse fetcher to fetch all borough data

            if not cases_df.empty:
                print("\nData fetching complete. Click on your selected filter to plot.")
                # TODO: Get plot to load agin after fetching or else will look silly
                print("Graph will now reload in ", end="")
                for i in range(3, 0, -1):
                    print(f"{i}", end=" ")
                    time.sleep(1)
                print("🚀")
                time.sleep(1)
                update_cases_plot(cases_df, year="All", month="All", boroughs=("All",))  # Plot fetched data
            else:
                print("\nNo data could be fetched. Please try again.")

    # TODO: SAVE BUTTON SETUP
    def save_button_callback(button):
        global cases_df
        if cases_df is not None and not cases_df.empty:
            with output_widget:  # Redirect output to the output_widget or else goes to log console
                clear_output(wait=True)
                fetcher.save_and_download_gzipped_file(cases_df, filename="combined_df.json.gz")
                print("A download link has been generated. Please download within 10 seconds.")
                print("Graph will reload in:")
                for i in range(10, 1, -1):
                    print(f"{i}", end=", ")
                    time.sleep(1)
                print("🚀")
                time.sleep(1)
                update_cases_plot(cases_df, year="All", month="All", boroughs=("All",))  # Plot fetched data
        else:
            print("No data available to save. Please fetch data first.")

    
    # TODO: Combines everything above into a click
    fetch_button.on_click(fetch_button_callback)
    save_button.on_click(save_button_callback)

    button_box = HBox([fetch_button, save_button])

    
    # TODO: Create interactive widgets for plotting. Failed to work w/o interact :(
    interact(lambda year, month, boroughs: update_cases_plot(cases_df, year, month, boroughs),
             year=year_dropdown, month=month_dropdown, boroughs=borough_dropdown)
    
    display(button_box)
    display(output_widget)
    


# ------------------------------------ MAIN ------------------------------------ #

# Prevent automatic execution on import (IDE only)
# if __name__ == "__main__":
#     # Load initial data
#     cases_df = load_initial_data()

#     # Show initial plot if data exists
#     if cases_df is not None:
#         plot_cases(cases_df)

#     # Create widgets
#     create_widgets(cases_df)

In [8]:
# Load initial data from JSON
filepath = "combined_df.json.gz"
cases_df = load_initial_data(filepath)

Loaded initial data successfully.


In [9]:
# # Check if cases has content
# cases_df.head()

In [10]:
# Manually call the widget to load it
if cases_df is not None:
    create_widgets(cases_df)
else:
    print("No data loaded. Fetch new data using the Fetch button.")

interactive(children=(Dropdown(description='Year:', options=('All', '2020', '2021', '2022', '2023', '2024'), v…

HBox(children=(Button(button_style='primary', description='Fetch New Data', icon='refresh', style=ButtonStyle(…

Output()