In [1]:
import geopandas as gpd
import pandas as pd
import requests
from shapely.geometry import Polygon

# SeaBee mission statistics

Query all **raster** datasets from GeoNode, get the **bounding boxes** and assign each mission as either `Seabirds` or `Habitats`. Calculate the areas of the bounding boxes and some simple summary statistics.

#### Notes

 * The **bounding boxes always overestimate the area flown** - in some cases by a large amount since the areas surveyed are not rectangular
 * Some of the NIVA missions are duplicated e.g. for testing and experimentation
 * The **totals here are for all flights**. For NIVA missions, there are usually multiple flights in the same area, so the actual ground covered by NIVA is smaller than estimated. Nevertheless, each flight has a different focus and the figures do represent (an upper limit on) the area surveyed

In [2]:
def get_geonode_data(geonode_url):
    # Initialize an empty list to store each dataset's information
    datasets = []

    # Initialize the URL for the first page of results
    url = f"{geonode_url}/api/v2/datasets/"

    while url:
        # Get a page of results
        response = requests.get(url)
        data = response.json()

        for dataset in data["datasets"]:
            # Get the bounding box and convert it into a polygon
            bbox = dataset["ll_bbox_polygon"]["coordinates"][0]
            polygon = Polygon(bbox)

            # Append the dataset's name and polygon to the list
            datasets.append(
                {
                    "name": dataset["name"],
                    'type': dataset["subtype"],
                    "abstract": dataset["abstract"],
                    "geometry": polygon,
                }
            )

        # Get the URL for the next page of results, or None if this is the last page
        url = data["links"]["next"]

    # Convert the list into a GeoDataFrame
    gdf = gpd.GeoDataFrame(datasets, crs="EPSG:4326")

    # Reproject to EPSG:3035 (ETRS89-extended / LAEA Europe) to calculate area in km^2
    gdf["area_km2"] = gdf.to_crs("EPSG:3035")["geometry"].area / 10**6

    return gdf


def extract_minio_path(x):
    """Parse the MinIO path from the 'abstract', where available.
    """
    try:
        return (
            pd.read_html(x, index_col=0)[0].loc["MinIO path", 1]
            if pd.notnull(x)
            else ""
        )
    except Exception:
        return ""


def assign_theme(row):
    """Very roughly estimate the 'theme' for layers (including where it is not
    explicitly specified).
    """
    if row["minio_path"].startswith("seabirds"):
        return "Seabirds"
    elif row["minio_path"].startswith("niva"):
        return "Habitats"
    elif len(row["name"].split("_")) == 3:
        return "Seabirds"
    else:
        return "Habitats"

In [3]:
# Process data
url = r"https://geonode.seabee.sigma2.no"
gdf = get_geonode_data(url)
gdf = gdf.query("(name != 'coastline') and (type == 'raster')")
gdf["minio_path"] = gdf["abstract"].apply(extract_minio_path)
gdf["theme"] = gdf.apply(assign_theme, axis="columns")
gdf.head()

Unnamed: 0,name,type,abstract,geometry,area_km2,minio_path,theme
0,oslo_fornebu_202310121354_RGB_80m,raster,RGB mosaic collected by NIVA at fornebu (oslo)...,"POLYGON ((10.63129 59.89283, 10.63129 59.89615...",0.0829,niva-tidy/2023/niva_202310121354_oslo_fornebu_...,Habitats
1,MavicMiniTest_Olberg_20230830_RGB_60m,raster,RGB mosaic collected by Niva at Olberg (Mavicm...,"POLYGON ((10.13078 59.00383, 10.13078 59.00808...",0.131799,niva-tidy/testing/MASSIMAL-NIVA-RGB-60m-MavicM...,Habitats
4,MASSIMAL-NIVA-RGB-60m-MavicMiniTest_Olberg_202...,raster,RGB mosaic collected by NIVA at Olberg (MASSIM...,"POLYGON ((10.13076 59.00387, 10.13076 59.00806...",0.12786,niva-tidy/testing/MASSIMAL-NIVA-RGB-60m-MavicM...,Habitats
5,Fedje_nordtrafikkstasjon_20230531,raster,RGB mosaic collected by NINA at nordtrafikksta...,"POLYGON ((4.69711 60.78068, 4.69711 60.78301, ...",0.063385,seabirds/2023/Fedje_nordtrafikkstasjon_20230531,Seabirds
6,Fedje_Stormark_20230531,raster,RGB mosaic collected by NINA at Stormark (Fedj...,"POLYGON ((4.73222 60.75960, 4.73222 60.76224, ...",0.089404,seabirds/2023/Fedje_Stormark_20230531,Seabirds


In [4]:
gdf.groupby("theme")["area_km2"].agg(['count', 'sum'])

Unnamed: 0_level_0,count,sum
theme,Unnamed: 1_level_1,Unnamed: 2_level_1
Habitats,19,28.170668
Seabirds,569,45.619627


In [5]:
gdf[["area_km2"]].agg(['count', 'sum'])

Unnamed: 0,area_km2
count,588.0
sum,73.790294
