### Image labelling
This notebook acts as a placeholder for the labelling stage. Image labelling was done using Labelbox using the API for uploads of images and download of labels. The code for this is provided below for reference but config parameters for Labelbox settings and API keys were removed.

In [None]:
import cv2
import geopandas as gpd
import labelbox as lb
import ndjson
import numpy as np
import os
import requests
import shutil

from config import Config
from core.utils import get_mapping_from_csv

In [8]:
config = Config.Config()

In [None]:
# Labelbox parameters
lb_api_key_path = ""
lb_export_path = ""
lb_mapping_path = ""
lb_project_id = ""
lb_dataset_id = ""
lb_global_key_prefix = ""
label_folder = config.label_folder

In [10]:
with open(lb_api_key_path) as file:
    api_key = file.readlines()[0]

client = lb.Client(api_key=api_key)

In [11]:
df_sample = gpd.read_file(config.sample_catalog_path)
df_sample.head(3)

Unnamed: 0,index,tile_id,map_id,col_off,row_off,tile_size,random_number,luc,luc_name,stratum,random_sample,legend_type,nara,tile_name,tile_path,geometry
0,268292,483,60463,4644,2224,256,0.451872,52,Closed evergreen broadleaved forest,Forest,False,3,False,60463_4644_2224.png,../data/processed/image_tiles/60463_4644_2224.png,"POLYGON ((105.16509 18.67607, 105.1651 18.6853..."
1,51221,687,53503,6475,4099,256,0.003091,52,Closed evergreen broadleaved forest,Forest,False,6,False,53503_6475_4099.png,../data/processed/image_tiles/53503_6475_4099.png,"POLYGON ((101.73326 20.60236, 101.73343 20.611..."
2,430102,532,63303,4986,286,256,0.067877,190,Impervious surfaces,Impervious surface,True,1,False,63303_4986_286.png,../data/processed/image_tiles/63303_4986_286.png,"POLYGON ((106.68722 10.74111, 106.67786 10.741..."


## Upload image tiles to Labelbox

In [12]:
def lb_create_asset(row, global_key_prefix):
    """Creates a Labelbox asset dictionary for a given row of data."""
    nara_id = "-1" if row.nara else ""
    asset =  {
        "row_data": row.tile_path,
        "global_key": f"{global_key_prefix}-{row.map_id}-{row.tile_id}{nara_id}",
        "media_type": "IMAGE",
        "metadata_fields": [
            {"schema_id": "clth2go8g003i0713gchv20dr", "value": row.map_id},
            {"schema_id": "clth3t2vm06wr07zf5zdmex1a", "value": row.tile_id},
            {"schema_id": "clth5rbfi02is070h649t0pkk", "value": row.stratum},
            {"schema_id": "clth2g3aw037v071pgp1z5v0r", "value": row.legend_type}
            ],
        }
    
    return asset

In [None]:
assets = [lb_create_asset(row, lb_global_key_prefix) for i, row in df_sample.iterrows()]
assets[0]

# Bulk add data rows to the dataset
dataset = client.get_dataset(lb_dataset_id)

task = dataset.create_data_rows(assets)
task.wait_till_done()
print(task.errors)

## Create label tiles from Labelbox annotations

In [14]:
def lb_get_mask(lb_export_file, project_id, api_key, class_indices, dst_folder):
    """Downloads and processes Labelbox annotations to create image labels."""
    # Open export json. Change name to your export file if required
    with open(lb_export_file) as f:
        data = ndjson.load(f)

        # Iterate over all images
        if not os.path.isdir(dst_folder):
            os.mkdir(dst_folder)

        for i, d in enumerate(data):
            files_in_folder = os.listdir(dst_folder)

            image_path = data[i]['data_row']['external_id']
            image_name = image_path.split("/")[-1]
            print(image_name)

            if image_name not in files_in_folder:
                mask_full = np.zeros((data[i]['media_attributes']['height'], data[i]['media_attributes']['width']), dtype="uint8")
                # Iterate over all masks
                for idx, _ in enumerate(data[i]['projects'][project_id]['labels'][0]['annotations']['objects']):
                    # Extract mask name and mask url
                    name = data[i]['projects'][project_id]['labels'][0]['annotations']['objects'][idx]['name']
                    url = data[i]['projects'][project_id]['labels'][0]['annotations']['objects'][idx]['mask']['url']

                    cl = class_indices[name]
                    
                    # Download mask
                    headers = {'Authorization': api_key}
                    #response = requests.get(url, headers=headers, stream=True)
                    for attempt in range(3):  # Retry up to 3 times
                        response = requests.get(url, headers=headers, stream=True)
                        if response.status_code == 200:
                            break
                        elif attempt == 2:
                            print(f"Failed to download mask for {image_name} after 3 attempts.")
                            continue
                        
                    if response.status_code == 200:
                        response.raw.decode_content = True
                        mask_data = np.asarray(bytearray(response.raw.read()), dtype="uint8")
                        image = cv2.imdecode(mask_data, cv2.IMREAD_GRAYSCALE)
                        
                    if image is None or image.size == 0:
                        print(f"Warning: Could not decode image {image_name}. Skipping mask.")
                        continue
                    # Assign mask index to image-mask 
                    mask_indices = np.atleast_1d(image == 255).nonzero()
                    mask_full[mask_indices] = cl

                unique = np.unique(mask_full)
                print('The masks of the image are: ')
                print(unique)
                cv2.imwrite(f"{dst_folder}/{image_name}", mask_full)
            else:
                print(f'File {image_name} already processed!')

In [15]:
if os.path.exists(label_folder):
    shutil.rmtree(label_folder)
os.makedirs(label_folder)

In [16]:
lb_mapping = get_mapping_from_csv(lb_mapping_path, col_key="lablebox", col_value="pixel")

In [17]:
lb_get_mask(
    lb_export_file=lb_export_path,
    project_id=lb_project_id,
    api_key=api_key,
    class_indices=lb_mapping,
    dst_folder=label_folder
    )

59271_3715_2290.png
The masks of the image are: 
[ 2  4  5  6 11 12]
52492_1950_3974.png
The masks of the image are: 
[ 4  6 16]
60424_2572_3320.png
The masks of the image are: 
[4 5 6 7]
63304_5953_6122.png
The masks of the image are: 
[ 1  2  3  4 12 13 15 16]
65511_1892_246.png
The masks of the image are: 
[ 0  6 10 11 12 13]
54483_2578_3845.png
The masks of the image are: 
[2 6 7]
58532_5917_475.png
The masks of the image are: 
[ 2  3  6 13]
56504_583_545.png
The masks of the image are: 
[0 6 7]
65513_5983_2305.png
The masks of the image are: 
[12 16]
61461_4635_2429.png
The masks of the image are: 
[12]
62291_2000_1546.png
The masks of the image are: 
[2 5 9]
64322_3823_4592.png
The masks of the image are: 
[ 6 16]
60512_2924_1130.png
The masks of the image are: 
[ 2  4  5  6 12 13 14 15 16]
57521_4466_281.png
The masks of the image are: 
[ 0  4  5  7 12 16]
61481_2863_4074.png
The masks of the image are: 
[ 2  4  5  6 16]
61463_1040_1296.png
The masks of the image are: 
[ 2  4  5