# HuBMAP anatomical structure (.zarr)

> Convert anatomical structure segmentations from json to masks (.zarr files)

In [None]:
# Install zarr and load packages
!pip install -qq zarr
import cv2, zarr, json, gc
import matplotlib.pyplot as plt, numpy as np, pandas as pd
from pathlib import Path
gc.enable()

Settings

In [None]:
path = Path('/kaggle/input/hubmap-kidney-segmentation')
df_train = pd.read_csv(path/"train.csv")
df_info = pd.read_csv(path/"HuBMAP-20-dataset_information.csv")
g_out = zarr.group(f'/kaggle/working/anatomy')

Conversion

In [None]:
for _, row in df_info.iterrows():
    
    print(row)
    idx = Path(row.image_file).stem
    split = 'train' if idx in df_train['id'].values else 'test'
    
    # Open json
    with open(path/split/row.anatomical_structures_segmention_file) as json_file:
        data = json.load(json_file)  

    # Empty mask
    umat = cv2.UMat(np.zeros((row.height_pixels, row.width_pixels), dtype=np.int32))
        
    # Fill array
    for poly in data:
        coords = poly['geometry']['coordinates']
        value = 1 if poly['properties']['classification']['name']=='Cortex' else 2
        coords_cand = [coords] if len(coords)==1 else coords
        for coord in coords_cand:
            coords2 = cv2.UMat(np.array([[int(a),int(b)] for a,b in coord[0]]))
            umat = cv2.fillConvexPoly(umat, coords2, value)

    # Workaround RAM overflow
    g_out[idx] = umat.get()
    del umat
    gc.collect()
    g_out[idx] = g_out[idx][:].astype('uint8')
    #print(g_out[idx].info)
    
    plt.imshow(cv2.resize(g_out[idx][:], dsize=(512, 512)))
    plt.show()