In [2]:
import datetime
import multiprocessing
import concurrent
import subprocess
import uuid
from iris.fileformats.pp import load_pairs_from_fields
import numpy as np
import logging
import warnings
import os, sys
import scipy.ndimage as ndimage
from skimage import measure
#from tqdm import tqdm
import pandas as pd
import iris

In [6]:
def grid_features(cube, thresholds=None, time_index=0, member=0, threshold_method='geq'):
    '''
    2D cube tracking for thresholds
    :param cube: Lat-lon cube
    :type cube:
    :param thresholds:
    :type thresholds:
    :param time_index:
    :type time_index:
    :param threshold_method:
    :type threshold_method:
    :return: DataFrame of identified objects and their properties
    :rtype: Pandas DataFrame
    '''
    assert thresholds is not None, "Threshold values not found."

    # indices = []
    time_indices = []
    mem_indices = []
    cube_dates = []
    object_coords = []
    object_labels = []
    threshold_values = []
    areas = []
    perimeters = []
    eccs = []
    orients = []
    centroids = []
    mean_values = []
    std_values = []
    max_values = []
    min_values = []
    ngrid_points = []
    forecast_period = []
    forecast_reference_time = []
    data_values = []
    surface_type = []
    index = time_index
    if cube.ndim == 2:
        ny, nx = cube.shape
        lons, lats = cube.coord('longitude').points, cube.coord('latitude').points

        # Cube date
        if cube.coords('time'):
            c_date = cube.coord('time').units.num2date(cube.coord('time').points)[0]
            cube_date = datetime.datetime(c_date.year, c_date.month, c_date.day)

        if cube.coords('forecast_reference_time'):
            frt = cube.coord('forecast_reference_time').units.num2date(
                cube.coord('forecast_reference_time').points)[0]
            forecast_rt = datetime.datetime(frt.year, frt.month, frt.day)
        else:
            forecast_rt = np.nan

        if cube.coords('forecast_period'):
            forecast_p = cube.coord('forecast_period').points[0]
        else:
            forecast_p = np.nan

        for threshold in thresholds:
            # print('Thresholding %s' %threshold)
            cube_data = cube.data.copy()
            mask = generate_mask(cube_data, threshold, threshold_method)

            # Label each feature in the mask
            labeled_array, num_features = ndimage.measurements.label(mask)
            # print('%s features labelled.' % num_features)
            # labelled_array is a mask hence != operator below
            for feature_num in range(1, num_features):
                print_progress_bar(feature_num + 1, num_features)

                # threshold
                threshold_values.append(threshold)
                object_labels.append(f'{index}_{member}_{threshold}_{feature_num}')
                loc = labeled_array != feature_num
                data_object = np.ma.masked_array(cube_data, loc)

                ###### Skimage needs the mask reversed
                lab_image = measure.label(labeled_array == feature_num)
                region = measure.regionprops(lab_image, np.ma.masked_array(cube_data, ~loc))

                # perimeter, eccentricity, orientation
                areas.append([p.area for p in region][0])
                perimeters.append([p.perimeter for p in region][0])
                eccs.append([p.eccentricity for p in region][0])
                orients.append([p.orientation for p in region][0])
                # print(eccs)
                ###############

                data_values.append(data_object.compressed())
                mean_values.append(np.ma.mean(data_object))
                std_values.append(np.ma.std(data_object))
                max_values.append(np.ma.max(data_object))
                min_values.append(np.ma.min(data_object))

                try:
                    y, x = ndimage.measurements.center_of_mass(data_object)
                    centroids.append((lons[round(x)], lats[round(y)]))
                except:
                    centroids.append((np.nan, np.nan))

                object_inds = np.where(loc == False)
                object_lats = [lats[i] for i in object_inds[0]]
                object_lons = [lons[i] for i in object_inds[1]]

                object_coords.append([(x, y) for x, y in zip(object_lons, object_lats)])

                # surface type
                # This slows the computation down significantly
                # surface_type.append(check_land_or_ocean(object_lons, object_lats))

                ngrid_points.append(len(object_lats))

                cube_dates.append(cube_date)
                forecast_period.append(forecast_p)
                forecast_reference_time.append(forecast_rt)
                # indices.append(index)
                time_indices.append(index)
                mem_indices.append(member)

        index += 1
    features = {'TimeInds': time_indices, 'Date': cube_dates,
                'Forecast_period': forecast_period, 'Forecast_reference_time': forecast_reference_time,
                'Threshold': threshold_values, 'ObjectLabel': object_labels, 'Area': areas,
                'GridPoints': ngrid_points,
                'Mean': mean_values, 'Std': std_values,
                'Max': max_values, 'Min': min_values,
                'Centroid': centroids, 'Polygon': object_coords, 'Data_values': data_values,
                'Perimeter': perimeters, 'Eccentricity': eccs, 'Orientation': orients}

    features = pd.DataFrame(features, columns=['TimeInds', 'Date', 'Forecast_period',
                                               'Forecast_reference_time', 'Threshold', 'ObjectLabel', 'Area',
                                               'Perimeter',
                                               'GridPoints', 'Eccentricity', 'Orientation',
                                               'Mean', 'Std', 'Max', 'Min', 'Centroid', 'Polygon', 'Data_values'])
    return features

In [4]:
cubes = iris.load_cube('/scratch/hadpx/SEA_monitoring/processed_SEA_data/mogreps/features/precip/precip_Features_24h_allMember_20241001.nc')

  cubes = _load_collection(uris, constraints, callback).cubes()


In [5]:
nmembers, ntime, _, _ = cubes.shape
print(nmembers, ntime)

36 8


In [10]:
frames = []
for i in range(ntime):
    for mem in range(nmembers):
        frames.append(grid_features(cubes[mem, i], thresholds=thresholds, time_index=i,
                                            threshold_method=threshold_method))

<DimCoord: realization / (unknown)  [ 0, 1, ..., 34, 35]  shape(36,)>