In [1]:
import time

import geopandas as gpd
import pandas as pd
import requests
from shapely.geometry import Polygon

# SeaBee mission statistics

Query all **raster** datasets from GeoNode, get the **bounding boxes** and assign each mission as `Seabirds`, `Mammals` or `Habitats`. Calculate the areas of the bounding boxes and some simple summary statistics.

#### Notes

 * The **bounding boxes always overestimate the area flown** - in some cases by a large amount since the areas surveyed are not rectangular
 * Some of the NIVA missions are duplicated e.g. for testing and experimentation
 * The **totals here are for all flights**. For NIVA missions, there are usually multiple flights in the same area, so the actual ground covered by NIVA is smaller than estimated. Nevertheless, each flight has a different focus and the figures do represent (an upper limit on) the area surveyed

In [2]:
def get_geonode_data(geonode_url):
    # Initialize an empty list to store each dataset's information
    datasets = []

    # Initialize the URL for the first page of results
    url = f"{geonode_url}/api/v2/datasets/"

    while url:
        # Get a page of results
        response = requests.get(url)
        data = response.json()

        for dataset in data["datasets"]:
            # Get the bounding box and convert it into a polygon
            bbox = dataset["ll_bbox_polygon"]["coordinates"][0]
            polygon = Polygon(bbox)

            # Append the dataset's name and polygon to the list
            datasets.append(
                {
                    "name": dataset["name"],
                    "type": dataset["subtype"],
                    "abstract": dataset["abstract"],
                    "geometry": polygon,
                }
            )

        # Get the URL for the next page of results, or None if this is the last page
        url = data["links"]["next"]

        time.sleep(0.5)

    # Convert the list into a GeoDataFrame
    gdf = gpd.GeoDataFrame(datasets, crs="EPSG:4326")

    # Reproject to EPSG:3035 (ETRS89-extended / LAEA Europe) to calculate area in km^2
    gdf["area_km2"] = gdf.to_crs("EPSG:3035")["geometry"].area / 10**6

    return gdf


def extract_minio_path(x):
    """Parse the MinIO path from the 'abstract', where available."""
    try:
        return (
            pd.read_html(x, index_col=0)[0].loc["MinIO path", 1]
            if pd.notnull(x)
            else ""
        )
    except Exception:
        return ""


def assign_theme(x):
    """Parse the SeaBee 'theme' from the 'abstract', where available."""
    try:
        return pd.read_html(x, index_col=0)[0].loc["Theme", 1] if pd.notnull(x) else ""
    except Exception:
        return ""

In [3]:
# Process data
url = r"https://geonode.seabee.sigma2.no"
gdf = get_geonode_data(url)
gdf = gdf.query("(name != 'coastline') and (type == 'raster')")
gdf["minio_path"] = gdf["abstract"].apply(extract_minio_path)
gdf["theme"] = gdf["abstract"].apply(assign_theme)
gdf.head()

Unnamed: 0,name,type,abstract,geometry,area_km2,minio_path,theme
0,niva-zosmap-odm-test_olberg-s_202309131545_rgb...,raster,Collected by niva at olberg-s (niva-zosmap-odm...,"POLYGON ((10.13115 59.00596, 10.13115 59.00781...",0.033212,niva-tidy/2023/niva_202309131545_larvik_olberg...,Habitat
1,niva-zosmap-odm-test_olberg-s_202309131223_rgb...,raster,Collected by niva at olberg-s (niva-zosmap-odm...,"POLYGON ((10.13070 59.00481, 10.13070 59.00792...",0.07962,niva-tidy/2023/niva_202309131223_larvik_olberg...,Habitat
2,agder_strandbergholmen_20230524,raster,Collected by NINA at strandbergholmen (agder) ...,"POLYGON ((7.89796 58.05432, 7.89796 58.05622, ...",0.058608,seabirds/2023/agder_strandbergholmen_20230524,Seabirds
3,Team1Dag14_ternholmen_202305281251,raster,Collected by NINA at ternholmen (Team1Dag14) o...,"POLYGON ((5.70259 59.28024, 5.70259 59.28280, ...",0.05206,seabirds/2023/Team1Dag14_ternholmen_202305281251,Seabirds
4,Team1Dag2_knappholmane1_202305161801,raster,Collected by NINA at knappholmane1 (Team1Dag2)...,"POLYGON ((10.73038 59.32310, 10.73038 59.32397...",0.010916,seabirds/2023/Team1Dag2_knappholmane1_20230516...,Seabirds


In [5]:
gdf.groupby("theme")["area_km2"].agg(["count", "sum"])

Unnamed: 0_level_0,count,sum
theme,Unnamed: 1_level_1,Unnamed: 2_level_1
Habitat,133,59.044068
Mammals,1,0.137753
Seabirds,622,48.741197


In [6]:
gdf[["area_km2"]].agg(["count", "sum"])

Unnamed: 0,area_km2
count,756.0
sum,107.923018
