In [None]:
import os
import pathlib
from os import path

import pandas as pd
import yaml
from label_studio_sdk.client import LabelStudio
from label_studio_sdk.label_interface import LabelInterface
from osgeo import gdal


def find_project_by_title(ls, project_title):
    """Find a project by its title."""
    for project in ls.projects.list():
        if project.title == project_title:
            return ls.projects.get(project.id)
    return None


LABEL_STUDIO_URL = os.getenv("LABEL_STUDIO_URL")
LABEL_STUDIO_API_KEY = os.getenv("LABEL_STUDIO_API_KEY")
LOCAL_FILES_ROOT = "data/local-files"

In [None]:
split_filepath = "../data/raw/sitg-orthophoto-2019-tiles/split.csv"
tile_dir = "../data/raw/sitg-orthophoto-2019-tiles"
# label studio project parameters
project_title = "sitg-orhtophoto-2019"
label_config_filepath = "../data/raw/label-config.xml"
# we will set up a local storage for the training tiles in a separate folder where we
# will copy the training tiles (converted to JPEG)
storage_dir = "../data/raw/label-studio/sitg-orthophoto-2019"
img_ext = "jpeg"
gdal_options = "-if GTiff -of JPEG"

# just to have an output - the list of task ids - fot the snakemake rule
dst_filepath = "../data/raw/label-studio-init-ids.yml"

In [None]:
# connect to the Label Studio API and check the connection
ls = LabelStudio(base_url=LABEL_STUDIO_URL, api_key=LABEL_STUDIO_API_KEY)

# check if project already exists
project = find_project_by_title(ls, project_title)

if project is None:
    # project does not exist, create it
    # read labeling config
    with open(label_config_filepath, "r") as f:
        label_config = LabelInterface(f.read().strip("\n"))
    # create project
    project = ls.projects.create(title=project_title, label_config=label_config.config)
    # create storage
    if not path.exists(storage_dir):
        # os.mkdir(storage_dir)
        pathlib.Path(storage_dir).mkdir(parents=True)
    ls.import_storage.local.create(
        title=project_title,
        project=project.id,
        path=path.abspath(storage_dir),
    )

In [None]:
# read train/test split and copy training tiles to the local storage folder
# (converted to JPEG)
split_df = pd.read_csv(split_filepath)
img_filename_ser = split_df[split_df["train"]]["img_filename"]
for img_filename in img_filename_ser[~img_filename_ser.apply(path.exists)]:
    gdal.Translate(
        path.join(
            storage_dir,
            f"{path.splitext(img_filename)[0]}.jpeg",
        ),
        path.join(tile_dir, img_filename),
        options=gdal_options,
    )

In [None]:
# import tasks to label studio project
task_ids = ls.projects.import_tasks(
    project.id,
    request=[
        {
            "image": path.join(
                f"{LOCAL_FILES_ROOT}/?d={project_title}",
                f"{path.splitext(img_filename)[0]}.jpeg",
            )
        }
        for img_filename in img_filename_ser
    ],
    return_task_ids=True,
)

In [None]:
# dump project and task ids to file
with open(dst_filepath, "w") as dst:
    yaml.dump({"project-id": project.id, "task-ids": task_ids.task_ids}, dst)