In [1]:
import os
import s3fs
import dask
import dash
import requests

from dash import dcc, html
from dash.dependencies import Input, Output
from concurrent.futures import ThreadPoolExecutor, as_completed

import numpy as np
import xarray as xr
import pandas as pd 
import cartopy.crs as ccrs
import ipywidgets as widgets
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import matplotlib.dates as mdates 
import cartopy.feature as cfeature
import matplotlib.patheffects as path_effects


In [2]:
# load in methods for reading, writing, and managing files stored in S3. The connection is made anonamously. 
s3 = s3fs.S3FileSystem(anon=True) 
# List all data folders available from UNSW
s3_bucket_name = "imos-data/"
s3_org_name = "UNSW/"
s3_product = "NRS_extremes/Temperature_DataProducts_v2"
data_folders = s3.ls(f"{s3_bucket_name}{s3_org_name}{s3_product}")
# List all subfolders under the product directory
data_folders

['imos-data/UNSW/NRS_extremes/Temperature_DataProducts_v2/MAI090',
 'imos-data/UNSW/NRS_extremes/Temperature_DataProducts_v2/PH050',
 'imos-data/UNSW/NRS_extremes/Temperature_DataProducts_v2/PH100',
 'imos-data/UNSW/NRS_extremes/Temperature_DataProducts_v2/ROT055']

## Load datasets into Memory (explicit load)

In [47]:
# Initialize an empty dictionary to store datasets
datasets = {}

# Iterate through each subfolder and load .nc files
for folder in data_folders:
    # List files in the current folder
    files_in_folder = s3.ls(folder)
    
    # Filter for .nc files
    nc_files = [file for file in files_in_folder if file.endswith(".nc")]
    
    # Load each .nc file into memory
    for nc_file in nc_files:
        # Open the file using s3fs and load the dataset into memory with xarray
        with s3.open(nc_file, mode='rb') as f:
            ds = xr.open_dataset(f, engine="h5netcdf")
            ds.load()  # Explicitly load the dataset into memory
        
        # Add to the datasets dictionary using the folder name as the key
        datasets[folder] = ds

In [None]:
datasets

## Optimised to variables download 

In [55]:
# Define the variables you want to extract, including "TEMP_PER10", "TEMP_PER90", and "TEMP_MEAN"
variables_to_extract = ["TIME", "TEMP_COLD_SPIKE", "TEMP_HEAT_SPIKE", "TEMP_PER10", "TEMP_PER90", "TEMP_MEAN"]

# Initialize an empty dictionary to store DataFrames
dataframes = {}

# Iterate through each subfolder and process .nc files
for folder in data_folders:
    # List files in the current folder
    files_in_folder = s3.ls(folder)
    
    # Filter for .nc files
    nc_files = [file for file in files_in_folder if file.endswith(".nc")]
    
    # Extract the last part of the folder name to use as the key
    folder_key = folder.rstrip('/').split('/')[-1]
    
    # Load each .nc file into memory and process it
    for nc_file in nc_files:
        # Open the file using s3fs and load the dataset into memory with xarray
        with s3.open(nc_file, mode='rb') as f:
            ds = xr.open_dataset(f, engine="h5netcdf", chunks={})

            # Extract the lat/lon attributes
            lat = ds.attrs.get('geospatial_lat_max')
            lon = ds.attrs.get('geospatial_lon_max')
            
            # Extract only the specified variables and their attributes
            if all(var in ds.variables for var in variables_to_extract):
                extracted_ds = ds[variables_to_extract].load()
                
                # Convert the dataset to a DataFrame
                df = extracted_ds.to_dataframe().reset_index()
                
                # Add lat and lon columns to the DataFrame
                df['lat'] = lat
                df['lon'] = lon
                
                # Calculate the intervals for heat spikes and cold spikes
                df['heat_spike_diff'] = df['TEMP_HEAT_SPIKE'] - df['TEMP_PER90']
                df['cold_spike_diff'] = df['TEMP_PER10'] - df['TEMP_COLD_SPIKE']
                
                # Calculate the interval range between TEMP_MEAN and the percentiles
                df['temp_interval'] = df['TEMP_PER90'] - df['TEMP_MEAN']
                
                # Classify spikes into categories based on the intervals
                def classify_spike(temp_diff, interval):
                    if pd.isna(temp_diff):
                        return None  # No spike
                    elif temp_diff < interval:
                        return 0  # Within 90th percentile
                    elif interval <= temp_diff < 2 * interval:
                        return 1  # 1-2 times the interval
                    elif 2 * interval <= temp_diff < 3 * interval:
                        return 2  # 2-3 times the interval
                    elif 3 * interval <= temp_diff < 4 * interval:
                        return 3  # 3-4 times the interval
                    else:
                        return 4  # 4+ times the interval
                
                # Apply the classification logic to both heat and cold spikes
                df['heat_spike_category'] = df.apply(lambda row: classify_spike(row['heat_spike_diff'], row['temp_interval']), axis=1)
                df['cold_spike_category'] = df.apply(lambda row: classify_spike(row['cold_spike_diff'], row['temp_interval']), axis=1)
                
                # Store the DataFrame in the dictionary using the folder's last part as the key
                if folder_key in dataframes:
                    # If the folder_key already exists, append to the existing DataFrame
                    dataframes[folder_key] = pd.concat([dataframes[folder_key], df], ignore_index=True)
                else:
                    # If the folder_key does not exist, initialize it with the current DataFrame
                    dataframes[folder_key] = df
            else:
                print(f"Missing one or more variables in file: {nc_file}")
            
            # Explicitly close the dataset to free memory
            ds.close()

# Print the keys to verify
print(dataframes.keys())


dict_keys(['MAI090', 'PH050', 'PH100', 'ROT055'])


In [56]:
dataframes

{'MAI090':                      TIME  DEPTH  TEMP_COLD_SPIKE  TEMP_HEAT_SPIKE  \
 0     1944-01-02 12:00:00    2.0              NaN              NaN   
 1     1944-01-02 12:00:00   21.0              NaN              NaN   
 2     1944-01-03 12:00:00    2.0              NaN              NaN   
 3     1944-01-03 12:00:00   21.0              NaN              NaN   
 4     1944-01-04 12:00:00    2.0              NaN              NaN   
 ...                   ...    ...              ...              ...   
 58347 2023-11-16 12:00:00   21.0              NaN              NaN   
 58348 2023-11-17 12:00:00    2.0              NaN              NaN   
 58349 2023-11-17 12:00:00   21.0              NaN              NaN   
 58350 2023-11-18 12:00:00    2.0              NaN              NaN   
 58351 2023-11-18 12:00:00   21.0              NaN              NaN   
 
        TEMP_PER10  TEMP_PER90  TEMP_MEAN    lat    lon  heat_spike_diff  \
 0       14.367753   17.349215  15.805466 -42.52  148.3     

##  Show location of stations in the Climatology directory

Can only be used if the datasets() xarray is being implemented in the download section

In [None]:
# Create a figure and a map with coastlines
fig, ax = plt.subplots(figsize=(12, 12), subplot_kw={'projection': ccrs.PlateCarree()})

# Add coastlines and land features
ax.coastlines(resolution='50m')
ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='lightgrey')

# Set the extent to focus on Australia (bounding box coordinates)
ax.set_extent([110, 160, -45, -10], crs=ccrs.PlateCarree())  # [lon_min, lon_max, lat_min, lat_max]


# Iterate over datasets to plot the "O" at geospatial_lat_max and geospatial_lon_max
for folder, ds in datasets.items():
    # Extract latitude and longitude from the dataset attributes
    lat = ds.attrs.get('geospatial_lat_max')
    lon = ds.attrs.get('geospatial_lon_max')
    
    # Check if the attributes exist before plotting
    if lat is not None and lon is not None:
        # Plot a big "O" at the location
        ax.text(lon, lat, 'O', fontsize=10, color='red', ha='center', va='center',
                transform=ccrs.PlateCarree())

# Set title and show plot
ax.set_title('Locations of Geospatial Maxima')
plt.show()

## Create and interactive date picker with the colour coded spikes

Can only be used if the datasets() xarray is being implemented in the download section

In [14]:
# Function to convert date to "days since 1950-01-01"
def date_to_days_since_1950(date):
    ref_date = pd.to_datetime("1950-01-01")
    delta = date - ref_date
    return delta.days

# Function to update the plot
def update_plot(date):
    # Convert selected date to "days since 1950-01-01"
    target_days = date_to_days_since_1950(pd.to_datetime(date))
    
    # Create a figure and a map with coastlines
    fig, ax = plt.subplots(figsize=(12, 12), subplot_kw={'projection': ccrs.PlateCarree()})
    ax.coastlines(resolution='50m')
    ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='lightgrey')
    ax.set_extent([110, 160, -45, -10], crs=ccrs.PlateCarree())

    # Iterate over datasets to plot markers
    for folder, ds in datasets.items():
        lat = ds.attrs.get('geospatial_lat_max')
        lon = ds.attrs.get('geospatial_lon_max')
        
        if lat is not None and lon is not None:
            # TIME is already in datetime64 format, so no need for timedelta conversion
            time_in_days = (ds['TIME'].values - np.datetime64("1950-01-01")) / np.timedelta64(1, 'D')

            # Find the index for the selected date in days since 1950
            date_idx = np.isclose(time_in_days, target_days, atol=0.5)  # use a tolerance for floating point comparisons
            
            if date_idx.any():
                temp_heat_spike = ds['TEMP_HEAT_SPIKE'].values[date_idx]
                temp_cold_spike = ds['TEMP_COLD_SPIKE'].values[date_idx]
                
                if not pd.isna(temp_heat_spike).all():
                    ax.plot(lon, lat, 'o', markersize=10, color='red', markeredgecolor='black', markeredgewidth=1.5,
                            transform=ccrs.PlateCarree())
                elif not pd.isna(temp_cold_spike).all():
                    ax.plot(lon, lat, 'o', markersize=10, color='blue', markeredgecolor='black', markeredgewidth=1.5,
                            transform=ccrs.PlateCarree())
                else:
                    ax.text(lon, lat, 'O', fontsize=15, color='black', ha='center', va='center',
                            transform=ccrs.PlateCarree())
    
    ax.set_title(f'Heat and Cold Spikes on {date}')
    plt.show()

# Create a date picker widget
date_picker = widgets.DatePicker(
    description='Select Date',
    value=pd.to_datetime("2018-11-30") # Date happens to be a Heatwave and a Cold spike at the same time east and west.
)

# Create an interactive function to trigger the plot update when the date changes
widgets.interactive(update_plot, date=date_picker)



interactive(children=(DatePicker(value=Timestamp('2018-11-30 00:00:00'), description='Select Date', step=1), O…

## Attempt at Bokeh to HTML

Can only be used if the datasets() xarray is being implemented in the download section

In [12]:
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook, curdoc
from bokeh.models import HoverTool, WMTSTileSource, ColumnDataSource
from bokeh.layouts import column
from bokeh.models.widgets import DatePicker
import numpy as np
import pandas as pd

output_notebook()

# Sample dataset for illustration
datasets = {
    "dataset_1": {"attrs": {"geospatial_lat_max": -33.8688, "geospatial_lon_max": 151.2093},
                  "TIME": np.array(['2018-11-30T00:00:00'], dtype='datetime64[ns]'),
                  "TEMP_HEAT_SPIKE": np.array([1]), "TEMP_COLD_SPIKE": np.array([np.nan])},
    "dataset_2": {"attrs": {"geospatial_lat_max": -31.9505, "geospatial_lon_max": 115.8605},
                  "TIME": np.array(['2018-11-30T00:00:00'], dtype='datetime64[ns]'),
                  "TEMP_HEAT_SPIKE": np.array([np.nan]), "TEMP_COLD_SPIKE": np.array([1])}
}

# Function to convert lat/lon to Mercator projection
def lon_lat_to_mercator(lon, lat):
    k = 6378137
    x = lon * (k * np.pi / 180.0)
    y = np.log(np.tan((90 + lat) * np.pi / 360.0)) * k
    return x, y

# Function to convert date to "days since 1950-01-01"
def date_to_days_since_1950(date):
    ref_date = pd.to_datetime("1950-01-01")
    delta = date - ref_date
    return delta.days

# Set up Bokeh figure with custom tile provider
p = figure(x_range=(11000000, 16000000), y_range=(-4500000, -1000000), 
           height=800, width=800,
           x_axis_type="mercator", y_axis_type="mercator")

tile_url = "https://c.tile.openstreetmap.org/{Z}/{X}/{Y}.png"
p.add_tile(WMTSTileSource(url=tile_url))

# Set up data source for interactive updating
source = ColumnDataSource(data=dict(x=[], y=[], color=[], size=[]))

p.circle(x='x', y='y', color='color', size='size', source=source, line_color='black', line_width=1.5)

hover = HoverTool(tooltips=[("Lat", "@y"), ("Lon", "@x")])
p.add_tools(hover)

# Function to update the plot based on the selected date
def update_plot_bokeh_interactive(attr, old, new):
    selected_date = pd.to_datetime(date_picker.value)
    target_days = date_to_days_since_1950(selected_date)
    
    # Clear current data
    new_data = dict(x=[], y=[], color=[], size=[])
    
    # Plot markers based on dataset
    for folder, ds in datasets.items():
        lat = ds['attrs']['geospatial_lat_max']
        lon = ds['attrs']['geospatial_lon_max']
        
        time_in_days = (ds['TIME'] - np.datetime64("1950-01-01")) / np.timedelta64(1, 'D')
        
        date_idx = np.isclose(time_in_days, target_days, atol=0.5)
        
        if date_idx.any():
            temp_heat_spike = ds['TEMP_HEAT_SPIKE'][date_idx]
            temp_cold_spike = ds['TEMP_COLD_SPIKE'][date_idx]
            
            lon_merc, lat_merc = lon_lat_to_mercator(lon, lat)
            
            if not pd.isna(temp_heat_spike).all():
                new_data['x'].append(lon_merc)
                new_data['y'].append(lat_merc)
                new_data['color'].append("red")
                new_data['size'].append(10)
            elif not pd.isna(temp_cold_spike).all():
                new_data['x'].append(lon_merc)
                new_data['y'].append(lat_merc)
                new_data['color'].append("blue")
                new_data['size'].append(10)
    
    # Update data source
    source.data = new_data
    p.title.text = f'Heat and Cold Spikes on {selected_date.date()}'

# Create a DatePicker widget
date_picker = DatePicker(title="Select Date", value="2018-11-30", min_date="2010-01-01", max_date="2022-12-31")
date_picker.on_change('value', update_plot_bokeh_interactive)

# Initial update for the plot
update_plot_bokeh_interactive(None, None, None)

# Layout and display
layout = column(date_picker, p)
curdoc().add_root(layout)
show(layout)


You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/js_callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html



## Attempt at plotly plot

In [45]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import ipywidgets as widgets

# Initialize a Plotly FigureWidget object
fig = go.FigureWidget()

# Function to update the plot
def update_plot(date):
    # Convert selected date to datetime
    target_date = pd.to_datetime(date)
    
    # Prepare lists to hold marker data
    lons = []
    lats = []
    colors = []
    text = []
    symbols = []

    # Iterate over datasets to gather marker data
    for folder_key, df in dataframes.items():
        # Filter the DataFrame by the selected date
        date_idx = df['TIME'].dt.normalize() == target_date.normalize()

        # If any rows match the date, gather the data for plotting
        if date_idx.any():
            # Iterate over the rows that match the date
            for idx, row in df[date_idx].iterrows():
                lat = row['lat']
                lon = row['lon']
                temp_heat_spike = row['TEMP_HEAT_SPIKE']
                temp_cold_spike = row['TEMP_COLD_SPIKE']

                # Determine the marker color and symbol based on spike data
                if not pd.isna(temp_heat_spike):
                    colors.append('red')
                    text.append('Heat Spike')
                    symbols.append('circle')  # Solid circle for heat spikes
                elif not pd.isna(temp_cold_spike):
                    colors.append('blue')
                    text.append('Cold Spike')
                    symbols.append('circle')  # Solid circle for cold spikes
                else:
                    colors.append('black')
                    text.append('No Data')
                    symbols.append('circle-open')  # Hollow circle for no data
                
                # Add lat/lon to the lists
                lons.append(lon)
                lats.append(lat)

    # Update the traces in the FigureWidget
    with fig.batch_update():
        fig.data = []  # Clear existing traces
        
        fig.add_trace(go.Scattergeo(
            lon=lons,
            lat=lats,
            mode='markers',
            marker=dict(
                size=10,
                color=colors,
                symbol=symbols,  # Use symbols to set marker shapes
                line=dict(
                    width=2,
                    color='black'
                ),
                opacity=0.6
            ),
            text=text,
            hoverinfo='text'
        ))
        
        fig.update_layout(
            title=f'Heat and Cold Spikes on {date}',
            geo=dict(
                scope='world',
                projection_type='mercator',
                center=dict(lat=-27.0, lon=135.0),  # Center around Australia
                projection=dict(
                    type='mercator'
                ),
                showland=True,
                landcolor='lightgrey',
                coastlinecolor='black',
                showocean=True,
                oceancolor='lightblue',
                showlakes=True,
                lakecolor='lightblue',
                showrivers=True,
                lonaxis=dict(
                    range=[100, 160]  # Adjust longitude bounds to cover all of Australia
                ),
                lataxis=dict(
                    range=[-50, -10]  # Adjust latitude bounds to cover all of Australia
                )
            ),
            autosize=True,
            height=800,  # Increase the height of the figure
            width=1000   # Increase the width of the figure
        )

# Create a date picker widget
date_picker = widgets.DatePicker(
    description='Select Date',
    value=pd.to_datetime("2018-11-30")
)

# Create an interactive function to trigger the plot update when the date changes
interactive_plot = widgets.interactive(update_plot, date=date_picker)

# Display the date picker and plot together
display(date_picker, fig)


DatePicker(value=Timestamp('2018-11-30 00:00:00'), description='Select Date', step=1)

FigureWidget({
    'data': [{'hoverinfo': 'text',
              'lat': [-42.52, -42.52, -34.02, -34.02, -34.02, -34.02, -34.02,
                      -34.02, -34.03, -34.03, -34.03, -34.03, -34.03, -34.03,
                      -34.03, -31.93, -31.93, -31.93, -31.93],
              'lon': [148.3, 148.3, 151.25, 151.25, 151.25, 151.25, 151.25,
                      151.25, 151.27, 151.27, 151.27, 151.27, 151.27, 151.27,
                      151.27, 115.45, 115.45, 115.45, 115.45],
              'marker': {'color': [red, red, black, black, black, black, black,
                                   black, black, black, red, red, black, red, red,
                                   blue, blue, black, blue],
                         'line': {'color': 'black', 'width': 2},
                         'opacity': 0.6,
                         'size': 10,
                         'symbol': [circle, circle, circle-open, circle-open,
                                    circle-open, circle-open, circle-

## Attempt to create a time slider instead of drop down

In [None]:
import ipywidgets as widgets
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# Function to convert date to "days since 1950-01-01"
def date_to_days_since_1950(date):
    ref_date = pd.to_datetime("1950-01-01")
    delta = date - ref_date
    return delta.days

# Function to convert "days since 1950-01-01" to a date
def days_since_1950_to_date(days):
    ref_date = pd.to_datetime("1950-01-01")
    return ref_date + pd.to_timedelta(days, unit='D')

# Function to update the plot
def update_plot(days):
    # Convert selected days to a date
    date = days_since_1950_to_date(days)
    
    # Create a figure and a map with coastlines
    fig, ax = plt.subplots(figsize=(12, 12), subplot_kw={'projection': ccrs.PlateCarree()})
    ax.coastlines(resolution='50m')
    ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='lightgrey')
    ax.set_extent([110, 160, -45, -10], crs=ccrs.PlateCarree())

    # Iterate over datasets to plot markers
    for folder, ds in datasets.items():
        lat = ds.attrs.get('geospatial_lat_max')
        lon = ds.attrs.get('geospatial_lon_max')
        
        if lat is not None and lon is not None:
            # TIME is already in datetime64 format, so no need for timedelta conversion
            time_in_days = (ds['TIME'].values - np.datetime64("1950-01-01")) / np.timedelta64(1, 'D')

            # Find the index for the selected date in days since 1950
            date_idx = np.isclose(time_in_days, days, atol=0.5)  # use a tolerance for floating point comparisons
            
            if date_idx.any():
                temp_heat_spike = ds['TEMP_HEAT_SPIKE'].values[date_idx]
                temp_cold_spike = ds['TEMP_COLD_SPIKE'].values[date_idx]
                
                if not pd.isna(temp_heat_spike).all():
                    ax.plot(lon, lat, 'o', markersize=10, color='red', markeredgecolor='black', markeredgewidth=1.5,
                            transform=ccrs.PlateCarree())
                elif not pd.isna(temp_cold_spike).all():
                    ax.plot(lon, lat, 'o', markersize=10, color='blue', markeredgecolor='black', markeredgewidth=1.5,
                            transform=ccrs.PlateCarree())
                else:
                    ax.text(lon, lat, 'O', fontsize=15, color='black', ha='center', va='center',
                            transform=ccrs.PlateCarree())
    
    ax.set_title(f'Heat and Cold Spikes on {date.date()}')
    plt.show()

# Determine the maximum number of days since 1950 based on the most recent dataset's date
latest_date = pd.to_datetime("today")  # Replace with the latest date in your data, or use today's date
max_days = date_to_days_since_1950(latest_date)

# Create a slider with the correct range
date_slider = widgets.IntSlider(
    value=date_to_days_since_1950(pd.to_datetime("2018-11-30")),  # Initial value
    min=0,  # Start from 1950-01-01
    max=max_days,  # Maximum number of days until today or your latest data
    step=1,  # Step in days
    description='Days since 1950',
    layout=widgets.Layout(width='800px')  # Adjust width of the slider
)

# Create an interactive function to trigger the plot update when the slider changes
interactive_plot = widgets.interactive(update_plot, days=date_slider)

# Display the slider and plot without duplicating the slider
widgets.VBox([interactive_plot])


## Atempt to use Plotly with HTML output

In [34]:
# Extract unique dates for dropdown
dates = []
for df in dataframes.values():
    dates.extend(df['TIME'].dt.date.unique())

# Remove duplicates and sort dates
dates = sorted(set(dates))
dates = [pd.Timestamp(date).strftime('%Y-%m-%d') for date in dates]

# Create the figure
fig = go.Figure()

def add_traces_for_date(date_str):
    date = pd.to_datetime(date_str).date()
    
    # Prepare lists to hold marker data
    lons, lats, colors, text, symbols = [], [], [], [], []
    
    for folder_key, df in dataframes.items():
        date_idx = df['TIME'].dt.date == date
        
        if date_idx.any():
            for idx, row in df[date_idx].iterrows():
                lat, lon = row['lat'], row['lon']
                temp_heat_spike = row['TEMP_HEAT_SPIKE']
                temp_cold_spike = row['TEMP_COLD_SPIKE']
                
                if not pd.isna(temp_heat_spike):
                    colors.append('red')
                    text.append('Heat Spike')
                    symbols.append('circle')
                elif not pd.isna(temp_cold_spike):
                    colors.append('blue')
                    text.append('Cold Spike')
                    symbols.append('circle')
                else:
                    colors.append('black')
                    text.append('No Data')
                    symbols.append('circle-open')
                
                lons.append(lon)
                lats.append(lat)
    
    fig.add_trace(go.Scattergeo(
        lon=lons,
        lat=lats,
        mode='markers',
        marker=dict(
            size=10,
            color=colors,
            symbol=symbols,
            line=dict(width=2, color='black'),
            opacity=0.6
        ),
        text=text,
        hoverinfo='text',
        name=f'Data for {date_str}'
    ))

# Add traces for all dates
for date in dates:
    add_traces_for_date(date)

# Create dropdown buttons
date_buttons = [
    {
        "label": date,
        "method": "update",
        "args": [
            {"visible": [trace.name == f'Data for {date}' for trace in fig.data]},
            {"title": f'Heat and Cold Spikes on {date}'}
        ]
    } for date in dates
]

# Update layout with dropdowns
fig.update_layout(
    updatemenus=[
        {
            "buttons": date_buttons,
            "direction": "down",
            "pad": {"r": 10, "t": 10},
            "showactive": True,
            "x": 0.1,
            "xanchor": "left",
            "y": 1.2,
            "yanchor": "top",
            "title": "Select Date"
        }
    ],
    geo=dict(
        scope='world',
        projection_type='mercator',
        center=dict(lat=-27.0, lon=135.0),
        projection=dict(type='mercator'),
        showland=True,
        landcolor='lightgrey',
        coastlinecolor='black',
        showocean=True,
        oceancolor='lightblue',
        lonaxis=dict(range=[100, 160]),
        lataxis=dict(range=[-50, -10])
    ),
    title="Heat and Cold Spikes",
    height=800,
    width=1000
)

# Save the figure as an HTML file
fig.write_html("interactive_map.html", include_plotlyjs='cdn')

# Optionally save the figure as a JSON file (data only, without interactivity)
fig.write_json("interactive_map.json")

KeyboardInterrupt: 

In [57]:
import plotly.graph_objects as go
import pandas as pd

# Assuming dataframes is already loaded and available
# Filter for November 2018
def filter_november_dataframes(dfs):
    filtered_dfs = {}
    for key, df in dfs.items():
        # Ensure TIME is in datetime format
        if not pd.api.types.is_datetime64_any_dtype(df['TIME']):
            df['TIME'] = pd.to_datetime(df['TIME'])
        
        # Filter only for November 2018
        df_november = df[(df['TIME'] >= '2018-11-01') & (df['TIME'] < '2018-12-01')]
        filtered_dfs[key] = df_november
    return filtered_dfs

# Apply the filter to the dataframes
filtered_dataframes = filter_november_dataframes(dataframes)

# Extract unique dates for dropdown (within November 2018)
dates = []
for df in filtered_dataframes.values():
    dates.extend(df['TIME'].dt.date.unique())

# Remove duplicates and sort dates
dates = sorted(set(dates))
dates = [pd.Timestamp(date).strftime('%Y-%m-%d') for date in dates]

# Create the figure
fig = go.Figure()

# Function to assign colors and categories based on deviation from percentiles
def assign_heat_category(deviation, interval):
    if deviation <= interval:
        return 0, 'yellow'
    elif deviation <= 2 * interval:
        return 1, 'orange'
    elif deviation <= 3 * interval:
        return 2, 'red'
    else:
        return 3, 'maroon'

def assign_cold_category(deviation, interval):
    if deviation <= interval:
        return 0, 'turquoise'
    elif deviation <= 2 * interval:
        return 1, 'cyan'
    elif deviation <= 3 * interval:
        return 2, 'royalblue'
    else:
        return 3, 'navy'

# Function to add traces for a specific date
def add_traces_for_date(date_str):
    date = pd.to_datetime(date_str).date()
    
    # Prepare lists to hold marker data
    lons, lats, colors, sizes, text, symbols = [], [], [], [], [], []
    
    for folder_key, df in filtered_dataframes.items():
        date_idx = df['TIME'].dt.date == date
        
        if date_idx.any():
            for idx, row in df[date_idx].iterrows():
                lat, lon = row['lat'], row['lon']
                temp_heat_spike = row['TEMP_HEAT_SPIKE']
                temp_cold_spike = row['TEMP_COLD_SPIKE']
                temp_per90 = row['TEMP_PER90']
                temp_per10 = row['TEMP_PER10']
                
                if not pd.isna(temp_heat_spike):
                    interval = temp_per90 - row['TEMP_MEAN']
                    deviation = temp_heat_spike - temp_per90
                    category, color = assign_heat_category(deviation, interval)
                    colors.append(color)
                    sizes.append(10 + deviation * 2)
                    text.append(f'Heat Spike: +{deviation:.2f}°C above 90th percentile\nCategory: {category}')
                    symbols.append('circle')
                
                elif not pd.isna(temp_cold_spike):
                    interval = row['TEMP_MEAN'] - temp_per10
                    deviation = temp_per10 - temp_cold_spike
                    category, color = assign_cold_category(deviation, interval)
                    colors.append(color)
                    sizes.append(10 + deviation * 2)
                    text.append(f'Cold Spike: -{deviation:.2f}°C below 10th percentile\nCategory: {category}')
                    symbols.append('circle')
                
                else:
                    colors.append('black')
                    sizes.append(8)
                    text.append('No Data')
                    symbols.append('circle-open')
                
                lons.append(lon)
                lats.append(lat)
    
    # Only add a trace if there is data for the date
    if lons and lats:
        fig.add_trace(go.Scattergeo(
            lon=lons,
            lat=lats,
            mode='markers',
            marker=dict(
                size=sizes,
                color=colors,
                symbol=symbols,
                line=dict(width=2, color='black'),
                opacity=0.6
            ),
            text=text,
            hoverinfo='text',
            name=f'Data for {date_str}',
            visible=False  # Start with all traces hidden
        ))

# Add traces for all dates in November 2018
for date in dates:
    add_traces_for_date(date)

# Show only the first date's data by default
if fig.data:
    fig.data[0].visible = True

# Create dropdown buttons for each date
date_buttons = [
    {
        "label": date,
        "method": "update",
        "args": [
            {"visible": [trace.name == f'Data for {date}' for trace in fig.data]},
            {"title": f'Heat and Cold Spikes on {date}'}
        ]
    } for date in dates
]

# Update layout with dropdowns
fig.update_layout(
    updatemenus=[
        {
            "buttons": date_buttons,
            "direction": "down",
            "pad": {"r": 10, "t": 10},
            "showactive": True,
            "x": 0.1,
            "xanchor": "left",
            "y": 1.2,
            "yanchor": "top"
        }
    ],
    geo=dict(
        scope='world',
        projection_type='mercator',
        center=dict(lat=-27.0, lon=135.0),
        projection=dict(type='mercator'),
        showland=True,
        landcolor='lightgrey',
        coastlinecolor='black',
        showocean=True,
        oceancolor='lightblue',
        lonaxis=dict(range=[100, 160]),
        lataxis=dict(range=[-50, -10])
    ),
    title="Heat and Cold Spikes",
    height=800,
    width=1000
)

# Save the figure as an HTML file
fig.write_html("interactive_map.html", include_plotlyjs='cdn')

# Optionally save the figure as a JSON file (data only, without interactivity)
fig.write_json("interactive_map.json")
