# Training Data Explorer

In [None]:
from environmental_insights import data as ei_data
import geopandas as gpd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output

def launch_air_quality_widget():
    """
    Instantiate and display the pollutant→station→column interactive widgets,
    along with the map + time‐series outputs. Call this function from any cell
    (e.g. a Thebe‐enabled cell) and it will wire everything up.
    """

    # ----------------------------------------------------------------
    # 1) Load UK outline once (for mapping)
    # ----------------------------------------------------------------
    uk_outline = ei_data.get_uk_grids_outline()

    # ----------------------------------------------------------------
    # 2) Define the list of available pollutants
    # ----------------------------------------------------------------
    available_pollutants = [
        "no2", "nox", "no", "o3", "pm10", "pm2.5", "so2", "co"
    ]

    # ----------------------------------------------------------------
    # 3) Create a dropdown widget for pollutant selection
    # ----------------------------------------------------------------
    pollutant_dropdown = widgets.Dropdown(
        options=[("Select pollutant", None)] + [(pol.upper(), pol) for pol in available_pollutants],
        value=None,
        description="Pollutant:",
        style={"description_width": "initial"},
    )

    # ----------------------------------------------------------------
    # 4) Create a dropdown widget for station selection (initially hidden)
    # ----------------------------------------------------------------
    station_dropdown = widgets.Dropdown(
        options=[("Select station", None)],
        value=None,
        description="Station:",
        style={"description_width": "initial"},
    )
    station_dropdown.layout.display = "none"
    station_dropdown.disabled = True

    # ----------------------------------------------------------------
    # 5) Create a dropdown widget for column selection (initially hidden)
    # ----------------------------------------------------------------
    column_dropdown = widgets.Dropdown(
        options=[("Select column", None)],
        value=None,
        description="Column:",
        style={"description_width": "initial"},
    )
    column_dropdown.layout.display = "none"
    column_dropdown.disabled = True

    # ----------------------------------------------------------------
    # 6) Create a spinner widget for loading indication
    # ----------------------------------------------------------------
    spinner = widgets.HTML(
        value="""
        <style>
        @keyframes spin {
          0% { transform: rotate(0deg); }
          100% { transform: rotate(360deg); }
        }
        .loader {
          border: 4px solid #f3f3f3;
          border-top: 4px solid #444;
          border-radius: 50%;
          width: 20px;
          height: 20px;
          animation: spin 1s linear infinite;
          display: inline-block;
          vertical-align: middle;
        }
        .loading-text {
          display: inline-block;
          margin-left: 8px;
          vertical-align: middle;
          font-style: italic;
          color: #444;
        }
        </style>
        <div class="loader"></div>
        <div class="loading-text">Loading...</div>
        """
    )
    spinner.layout.display = "none"  # Hidden initially

    # ----------------------------------------------------------------
    # 7) Create output widgets for text feedback, map, and timeseries
    # ----------------------------------------------------------------
    output_area = widgets.Output()
    map_output = widgets.Output()
    ts_output = widgets.Output()

    # ----------------------------------------------------------------
    # 8) We'll store the last‐fetched station GeoDataFrame in a one‐element dict
    #    so that nested callbacks can modify it.
    # ----------------------------------------------------------------
    state = {"last_station_gdf": None}

    # ----------------------------------------------------------------
    # 9) Callback: when pollutant changes → populate stations, reset downstream
    # ----------------------------------------------------------------
    def on_change_pollutant(change):
        if change["type"] == "change" and change["name"] == "value":
            selected_pollutant = change["new"]

            # Hide & reset station & column dropdowns, clear outputs
            station_dropdown.layout.display = "none"
            station_dropdown.disabled = True
            station_dropdown.options = [("Select station", None)]
            station_dropdown.value = None

            column_dropdown.layout.display = "none"
            column_dropdown.disabled = True
            column_dropdown.options = [("Select column", None)]
            column_dropdown.value = None

            with output_area:
                clear_output()
            with map_output:
                clear_output()
            with ts_output:
                clear_output()
            spinner.layout.display = "none"

            if selected_pollutant is None:
                return

            # Show station dropdown (greyed out while loading)
            station_dropdown.layout.display = ""
            station_dropdown.disabled = True

            # Show spinner during station fetch
            spinner.layout.display = ""

            with output_area:
                clear_output()
                print(f"Fetching stations for {selected_pollutant.upper()}...")

            # Fetch station list (this may take a moment)
            stations = ei_data.get_uk_monitoring_stations(selected_pollutant)

            # Once fetched, hide spinner
            spinner.layout.display = "none"

            # Build station options with a “Select station” placeholder at top
            placeholder = [("Select station", None)]
            station_options = placeholder + [(s, s) for s in stations]
            station_dropdown.options = station_options
            station_dropdown.value = None  # Reset to placeholder

            # Enable station dropdown now that loading is done
            station_dropdown.disabled = False

            with output_area:
                clear_output()
                print(f"Fetched {len(stations)} station(s) measuring {selected_pollutant.upper()}.")
                print("Please select a station from the dropdown.")

    # ----------------------------------------------------------------
    # 10) Callback: when station changes → fetch station_gdf and plot map
    # ----------------------------------------------------------------
    def on_change_station(change):
        if change["type"] == "change" and change["name"] == "value":
            selected_station = change["new"]
            selected_pollutant = pollutant_dropdown.value

            # Hide & reset column dropdown, clear timeseries
            column_dropdown.layout.display = "none"
            column_dropdown.disabled = True
            column_dropdown.options = [("Select column", None)]
            column_dropdown.value = None

            with output_area:
                clear_output()
                if selected_station is None:
                    print(f"No station selected for {selected_pollutant.upper()}.")
                    return
                print(
                    f"You have selected:\n"
                    f"  • Pollutant: {selected_pollutant.upper()}\n"
                    f"  • Station:   {selected_station}"
                )

            # Show spinner while fetching station geometry & plotting map
            spinner.layout.display = ""

            # Clear previous map and timeseries
            with map_output:
                clear_output()
            with ts_output:
                clear_output()

            # Fetch the GeoDataFrame for the chosen station
            station_gdf = ei_data.get_uk_monitoring_station(
                pollutant=selected_pollutant, station=selected_station
            )
            state["last_station_gdf"] = station_gdf  # store it for column callback

            with map_output:
                clear_output()

                if station_gdf.empty:
                    spinner.layout.display = "none"
                    print("Station geometry not found.")
                else:
                    # Plot UK outline + station location
                    fig, ax = plt.subplots(figsize=(6, 6))
                    uk_outline.plot(ax=ax, facecolor="none", edgecolor="black", linewidth=0.8)

                    station_gdf.plot(
                        ax=ax,
                        marker="x",
                        color="red",
                        markersize=100,
                        label=selected_station,
                    )

                    # Annotate station name
                    for idx, row in station_gdf.iterrows():
                        pt = row.geometry
                        ax.text(
                            pt.x + 2000,  # offset in meters
                            pt.y + 2000,
                            selected_station,
                            fontsize=10,
                            zorder=6
                        )

                    ax.set_title(f"{selected_station} Location", fontsize=12)
                    ax.axis("off")
                    plt.tight_layout()
                    plt.show()

                    # After map is done, hide spinner
                    spinner.layout.display = "none"

            # If station exists, now show column dropdown
            if not station_gdf.empty:
                # Determine which columns are available (excluding geometry/timestamp)
                possible_columns = [
                    col
                    for col in station_gdf.columns
                    if col not in ["geometry", "Timestamp", "timestamp"]
                ]
                if possible_columns:
                    placeholder = [("Select column", None)]
                    column_options = placeholder + [(col, col) for col in possible_columns]
                    column_dropdown.options = column_options
                    column_dropdown.value = None
                    column_dropdown.layout.display = ""
                    column_dropdown.disabled = False

                    with output_area:
                        print("Please select a column to plot from the dropdown below.")

    # ----------------------------------------------------------------
    # 11) Callback: when column changes → plot timeseries only
    # ----------------------------------------------------------------
    def on_change_column(change):
        if change["type"] == "change" and change["name"] == "value":
            selected_column = change["new"]
            selected_station = station_dropdown.value
            selected_pollutant = pollutant_dropdown.value

            with output_area:
                clear_output()
                if selected_column is None:
                    print("No column selected.")
                    return
                print(
                    f"You have selected:\n"
                    f"  • Pollutant: {selected_pollutant.upper()}\n"
                    f"  • Station:   {selected_station}\n"
                    f"  • Column:    {selected_column}"
                )

            # Show spinner while building and drawing the line chart
            spinner.layout.display = ""

            station_gdf = state["last_station_gdf"]
            if station_gdf is None or station_gdf.empty:
                with ts_output:
                    clear_output()
                    print("Station data not available for plotting.")
                spinner.layout.display = "none"
                return

            with ts_output:
                clear_output()

                # Build the time index
                if "Timestamp" in station_gdf.columns:
                    times = station_gdf["Timestamp"]
                elif "timestamp" in station_gdf.columns:
                    times = station_gdf["timestamp"]
                else:
                    station_gdf = station_gdf.set_index(station_gdf.index)
                    times = station_gdf.index

                # Build the measurement series
                measurements = station_gdf[selected_column]

                # Plot the time series
                fig, ax_ts = plt.subplots(figsize=(8, 4))
                ax_ts.plot(times, measurements, linewidth=1.0)
                ax_ts.set_title(f"{selected_column}", fontsize=12)
                ax_ts.set_xlabel("Time", fontsize=10)
                ax_ts.set_ylabel(selected_column, fontsize=10)
                ax_ts.grid(alpha=0.3)

                plt.tight_layout()
                plt.show()

                # After the line chart is done, hide spinner
                spinner.layout.display = "none"

    # ----------------------------------------------------------------
    # 12) Wire up callbacks
    # ----------------------------------------------------------------
    pollutant_dropdown.observe(on_change_pollutant)
    station_dropdown.observe(on_change_station)
    column_dropdown.observe(on_change_column)

    # ----------------------------------------------------------------
    # 13) Display all widgets (in order)
    # ----------------------------------------------------------------
    display(
        pollutant_dropdown,
        station_dropdown,
        spinner,        # Spinner appears below the dropdowns
        output_area,
        map_output,
        column_dropdown,
        ts_output
    )

    # Optionally return the widgets if you want to keep references:
    return {
        "pollutant": pollutant_dropdown,
        "station": station_dropdown,
        "column": column_dropdown,
        "spinner": spinner,
        "output": output_area,
        "map": map_output,
        "timeseries": ts_output,
    }



In [None]:
# Finally, call the function so that the UI appears in this cell:
widgets_dict = launch_air_quality_widget()