In [2]:
import xarray as xr
import geopandas as gpd
import pandas as pd
from shapely.geometry import MultiPoint, Polygon
from keplergl import KeplerGl
import zarr

# Define depth from 36 levels: [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 80, 90, 100, 125, 150, 175, 200, 225, 250, 275, 300, 350, 400, 450, 500, 550, 600, 700, 800, 900, 1000]
depth = 0

# Concentration of hotspot from range 0 to 1. Example 0.8 takes over 80% quantile.
hotspot_concentration = 0.8

In [3]:

# Using ARCO format zarr we access all the plankton outputs from one link, no downloading necessary
zarr_path = "https://s3.waw3-1.cloudferro.com/emodnet/bluecloud_edito_demo/soca_chla_01_12.zarr"
chlds = xr.open_dataset(zarr_path, engine='zarr')

chlds
def get_WildlifeData():
    '''Functions that gives back a data sample of Great Whale migration
    Return: <geodataframe> crs wgs84    
    '''
    filepath = r'input/azores_whales_records.gpkg'
    geodata = gpd.read_file(filepath, driver='GPKG')
    bbox = geodata.total_bounds
    return geodata, bbox

# %%
def get_PhytoHotspot(geodata, depth, hotspot_concentration):
    '''Function that gives back CHL concentration for all months, at specific depth, and hotspot concentration
    Input - geodata <geodataframe>, depth: <int>, hotspot_concentration <float>, zarr_filepath <str>
    Output - chl_data: <dataframe>
    '''
    # 1) Bounding box of animal tracking data
    bbox = MultiPoint([point for point in geodata['geometry']]).bounds
    bbox_geom = Polygon([(bbox[0], bbox[1]), (bbox[2], bbox[1]), (bbox[2], bbox[3]), (bbox[0], bbox[3])]).buffer(0.5)
    bbox_bounds = bbox_geom.bounds

    # Using ARCO format zarr we access all the plankton outputs from one link, no downloading necessary
    zarr_path = "https://s3.waw3-1.cloudferro.com/emodnet/bluecloud_edito_demo/soca_chla_01_12.zarr"
    
    # 2) Reading phytoplankton dataset and filtering    
    DS = xr.open_dataset(zarr_path, engine='zarr').sel(depth=depth)
    DS = DS.sel(longitude=slice(bbox_bounds[0]-10, bbox_bounds[2]+10)).sel(latitude=slice(bbox_bounds[1], bbox_bounds[3]))['soca_chla']
    
    chl_df_list = []
    for month in range(1, 13):
        month_str = ('0000' + str(month))[-2:]
        month_data = DS.sel(time=f'2018-{month_str}').to_dataframe().reset_index(drop=False).dropna()
        month_data['time'] = pd.to_datetime(month_data['time'])  
        percentile = month_data['soca_chla'].quantile(hotspot_concentration)
        month_data = month_data.loc[month_data['soca_chla'] >= percentile]
        month_data = month_data.rename(columns={'soca_chla': 'CHLa_mg_L'})
        chl_df_list.append(month_data)
    
    chl_df = pd.concat(chl_df_list, ignore_index=True)
    return chl_df

# %%
def plot_PhytoMap(geodata, phytoplankton, MPAs):
    '''Function that returns a map of phytoplankton hotspots and marine wildlife data
    return <mapgl instance>
    '''
    # Ensure time data is in string format
    geodata['timestamp'] = geodata['timestamp'].astype(str)
    phytoplankton['time'] = pd.to_datetime(phytoplankton['time'])
    
    # Extract distinct months
    phytoplankton['month'] = phytoplankton['time'].dt.to_period('M')
    time_min = phytoplankton['time'].min()
    time_max = phytoplankton['time'].max()
    phytoplankton['time'] = phytoplankton['time'].astype(str)
    phytoplankton['month'] = phytoplankton['month'].astype(str)
    # Convert min and max time to strings
    time_min_str = time_min.strftime('%Y-%m-%dT%H:%M:%SZ')
    time_max_str = time_max.strftime('%Y-%m-%dT%H:%M:%SZ')

    config = {
        "version": "v1",
        "config": {
            "visState": {
                "filters": [
                    {
                        "dataId": ["Great Whales"],
                        "id": "time_filter",
                        "name": ["timestamp"],
                        "type": "timeRange",
                        "value": [geodata['timestamp'].min(), geodata['timestamp'].max()],
                        "enlarged": True,
                        "plotType": "histogram",
                        "animationWindow": "free",
                        "yAxis": None
                    }
                    # {
                    #     "dataId": ["Phytoplankton"],
                    #     "id": "month_filter",
                    #     "name": ["month"],
                    #     "type": "multiSelect",
                    #     "value": ['2018-05'],
                    #     "enlarged": True,
                    #     "plotType": "histogram",
                    #     "animationWindow": "free",
                    #     "yAxis": None
                    # }
                ],
                "layers": [
                    {
                        "id": "whales_layer",
                        "type": "geojson",
                        "config": {
                            "dataId": "Great Whales",
                            "label": "Great Whales",
                            "color": [18, 147, 154],
                            "columns": {"geojson": "geometry"},
                            "isVisible": True,
                            "visConfig": {
                                "opacity": 0.8,
                                "strokeOpacity": 0.8,
                                "thickness": 0.5,
                                "colorRange": {
                                    "name": "ColorBrewer Paired-8",
                                    "type": "qualitative",
                                    "category": "ColorBrewer",
                                    "colors": [
                                        "#a6cee3", "#1f78b4", "#b2df8a", "#33a02c",
                                        "#fb9a99", "#e31a1c", "#fdbf6f", "#ff7f00"
                                    ]
                                },
                                "radius": 10
                            }
                        },
                        "visualChannels": {
                            "colorField": {"name": "wild_id", "type": "string"},
                            "colorScale": "ordinal"
                        }
                    },
                    {
                        "id": "phyto_layer",
                        "type": "grid",
                        "config": {
                            "dataId": "Phytoplankton",
                            "label": "Phytoplankton",
                            "color": [221, 178, 124],
                            "columns": {"lat": "latitude", "lng": "longitude"},
                            "isVisible": False,
                            "visConfig": {
                                "opacity": 0.6,
                                "worldUnitSize": 30,
                                "colorRange": {
                                    "name": "ColorBrewer Greens-9",
                                    "type": "singlehue",
                                    "category": "ColorBrewer",
                                    "colors": [
                                        "#f7fcf5", "#e5f5e0", "#c7e9c0", "#a1d99b",
                                        "#74c476", "#41ab5d", "#238b45", "#006d2c", "#00441b"
                                    ]
                                },
                                "coverage": 1,
                                "sizeRange": [0, 500],
                                "elevationScale": 5,
                                "colorAggregation": "average",
                                "sizeAggregation": "count",
                                "enable3d": False
                            }
                        },
                        "visualChannels": {
                            "colorField": {"name": "CHLa_mg_L", "type": "real"},
                            "colorScale": "quantile"
                        }
                    }
                ],
                "interactionConfig": {
                    "tooltip": {
                        "fieldsToShow": {
                            "Great Whales": [
                                {"name": "timestamp", "format": None},
                                {"name": "location_long", "format": None},
                                {"name": "individual_id", "format": None},
                                {"name": "tag_id", "format": None},
                                {"name": "wild_id", "format": None}
                            ],
                            "Phytoplankton": [
                                {"name": "time", "format": None},
                                {"name": "depth", "format": None},
                                {"name": "CHLa_mg_L", "format": None}
                            ]
                        },
                        "compareMode": False,
                        "enabled": True
                    },
                    "brush": {"enabled": False},
                    "geocoder": {"enabled": False},
                    "coordinate": {"enabled": False}
                },
                "layerBlending": "normal",
                "splitMaps": [],
                "animationConfig": {"currentTime": None, "speed": 1}
            },
            "mapState": {
                "bearing": 0,
                "dragRotate": False,
                "latitude": 39.08282277285858,
                "longitude": -47.56469878817516,
                "pitch": 0,
                "zoom": 1.7759365278629409,
                "isSplit": False
            },
            "mapStyle": {
                "styleType": "satellite",
                "visibleLayerGroups": {
                    "label": True,
                    "road": True,
                    "building": True,
                    "water": True,
                    "land": True
                },
                "threeDBuildingColor": [9.665468314072013, 17.18305478057247, 31.1442867897876],
                "mapStyles": {}
            }
        }
    }

    # Add a new layer for MPAs
    config["config"]["visState"]["layers"].append({
        "id": "mpas_layer",
        "type": "geojson",
        "config": {
            "dataId": "MPAs",
            "label": "MPAs",
            "color": [0, 0, 128],  # Light blue color
            "columns": {"geojson": "geometry"},
            "isVisible": True,
            "visConfig": {
                "opacity": 0.5,  # Lesser opacity
                "strokeOpacity": 0.8,
                "thickness": 0.5,
                "filled" : True,
                "colorRange": {
                    "name": "ColorBrewer Blues-9",
                    "type": "singlehue",
                    "category": "ColorBrewer",
                    "colors": [
                        "#f7fbff", "#deebf7", "#c6dbef", "#9ecae1",
                        "#6baed6", "#4292c6", "#2171b5", "#08519c", "#08306b"
                    ]
                },
                "radius": 10
            }
        },
        "visualChannels": {
            "colorField": {"name": "wild_id", "type": "string"},
            "colorScale": "ordinal"
        }
    })

    # Update the colorField for whales_layer
    for layer in config["config"]["visState"]["layers"]:
        if layer["id"] == "whales_layer":
            layer["visualChannels"]["colorField"] = {"name": "wild_id", "type": "string"}
            break

    # Update the whales_layer
    for layer in config["config"]["visState"]["layers"]:
        if layer["id"] == "whales_layer":
            layer["config"]["visConfig"]["filled"] = True  # Fill in the whale layer
            layer["config"]["visConfig"]["radius"] = 10  # Set radius to 20
            layer["visualChannels"]["colorField"] = {"name": "wild_id", "type": "string"}  # Color based on wild_id
            layer["visualChannels"]["colorScale"] = "quantile"  # Use quantile scale for color
            break

    # Update the time_filter for Great Whales
    for filter in config["config"]["visState"]["filters"]:
        if filter["dataId"] == ["Great Whales"] and filter["id"] == "time_filter":
            time_min = pd.to_datetime(geodata['timestamp'].min())
            time_max = time_min + pd.DateOffset(months=3)  # Add 3 months to the minimum timestamp
            filter["value"] = [time_min.strftime('%Y-%m-%dT%H:%M:%SZ'), time_max.strftime('%Y-%m-%dT%H:%M:%SZ')]  # Set the value to the first 3 months
            break
    # Create the map
    Map = KeplerGl(height=800)
    Map.add_data(MPAs, 'MPAs')
    Map.add_data(geodata, 'Great Whales')
    Map.add_data(phytoplankton, 'Phytoplankton')
    Map.config = config

    return Map

# Input datasets


# Main execution
geodata, whales_bbox = get_WildlifeData()
MPAs = gpd.read_file('data/MPAs/WDPA_WDOECM_May2024_Public_marine_shp/WDPA_WDOECM_May2024_Public_marine_shp_2/WDPA_WDOECM_May2024_Public_marine_shp-polygons.shp')
# Filter MPAs to be within the bounding box of the whale migration data
MPAs = MPAs.cx[whales_bbox[0]:whales_bbox[2], whales_bbox[1]:whales_bbox[3]]
phyto_data = get_PhytoHotspot(geodata, depth, hotspot_concentration)
map_instance = plot_PhytoMap(geodata, phyto_data, MPAs)

map_instance.save_to_html(file_name='outputhtml/phyto_map_all_months_MPAs.html')

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to outputhtml/phyto_map_all_months_MPAs.html!
