# Export shapes for QuPath QC

### See [Issue #6](https://github.com/josenimo/openDVP/issues/6) for more details. 

## Scenario 1: from spatialdata

In [1]:
# imports
import spatialdata



In [2]:
sdata = spatialdata.read_zarr("../data/sdata/20241201_1730_sdata_fullres.zarr")

  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  utils.warn_names_duplicates("var")


In [3]:
sdata

SpatialData object, with associated Zarr store: /Users/jnimoca/Jose_BI/1_Pipelines/openDVP/data/sdata/20241201_1730_sdata_fullres.zarr
├── Images
│     └── 'image': DataArray[cyx] (15, 44470, 73167)
├── Labels
│     └── 'mask': DataArray[yx] (44470, 73167)
├── Shapes
│     └── 'Primary_contours': GeoDataFrame shape: (347, 6) (2D shapes)
└── Tables
      └── 'proteomics': AnnData (18, 4531)
with coordinate systems:
    ▸ 'global', with elements:
        image (Images), mask (Labels), Primary_contours (Shapes)

In [4]:
# lets polygonize the labels
sdata['seg_polygons'] = spatialdata.to_polygons(sdata['mask'])

In [None]:
# name them after their cellid, this will be shown in Qupath, might be useful to track them
sdata['seg_polygons']['name'] = "cellID_" + sdata['seg_polygons']['label'].astype(int).astype(str)

In [58]:
# Important to set the objectType to detection, so that QuPath knows that these are detections and not annotations
# Detections are much more computationally efficient to work with in QuPath
sdata['seg_polygons']['properties'] = {'objectType' : "detection"}

In [None]:
# to_polygons creates both index and column called label, maybe this breaks something for napari interactive viz
sdata['seg_polygons'].drop(columns=['label'], inplace=True)

In [64]:
import time
datetime = time.strftime("%Y%m%d_%H%M%S")
sdata['seg_polygons'].to_file(f"../data/sdata/{datetime}_fullres_qupath.geojson", driver="GeoJSON")

  write(


## Scenario 2. Files

In [None]:
path_to_subset_mask = "../data/subset_elements/mask.ome.tif"

In [65]:
#lets try to poligonize the mask
import numpy as np
from shapely.geometry import shape, MultiPolygon
from rasterio.features import shapes
import geopandas as gpd
import skimage.io 

#TODO deal with allowed datatypes
def create_geodataframe_with_multipolygons(array):
    """
    Converts a labeled segmentation mask into a GeoDataFrame with polygons or multipolygons for each cell.
    Args:
        array: A 2D labeled segmentation mask, where pixel values represent cell IDs and background is 0.
    Returns:
        geopandas.GeoDataFrame: A GeoDataFrame containing polygons/multipolygons and their cell IDs.
    Caveats:
        Larger than memory arrays will fail
    """
    
    # Dictionary to store geometries grouped by cell ID
    cell_geometries = {}
    
    # Extract shapes and corresponding values
    for shape_dict, cell_id in shapes(array, mask=(array > 0)):
        polygon = shape(shape_dict)  # Convert to Shapely geometry
        cell_id = int(cell_id)
        if cell_id not in cell_geometries:
            cell_geometries[cell_id] = []
        cell_geometries[cell_id].append(polygon)
    
    # Combine multiple polygons into MultiPolygons if needed
    cell_ids = []
    geometries = []
    for cell_id, polygons in cell_geometries.items():
        if len(polygons) == 1:
            geometries.append(polygons[0])  # Single Polygon
        else:
            geometries.append(MultiPolygon(polygons))  # Combine into MultiPolygon
        cell_ids.append(cell_id)
    
    # Create a GeoDataFrame
    gdf = gpd.GeoDataFrame({'cellId': cell_ids, 'geometry': geometries}, crs="EPSG:4326")
    
    return gdf

In [None]:
mask = skimage.io.imread(path_to_subset_mask)
mask = mask.astype(np.int32)
gdf = create_geodataframe_with_multipolygons(mask)

In [None]:
gdf.sort_values(by='cellId', inplace=True)
gdf.reset_index(drop=True, inplace=True)
gdf['objectType'] = 'detection'
gdf['name'] = "cellID_" + gdf['cellId'].astype(str)

In [None]:
import time
datetime = time.strftime("%Y%m%d_%H%M%S")
gdf.to_file(f"../data/subset_elements/{datetime}mask_detections.geojson", driver='GeoJSON')

'name' has to be lower case

In [None]:
# we could color them by using classification