In [1]:
import os

# Changes the current path to find the source files

current_dir = os.getcwd()
while current_dir != os.path.abspath("../src"):
    os.chdir("..")
    current_dir = os.getcwd()

In [2]:
from utils import create_all_folders, Folders

create_all_folders()

In [None]:
# %%html

# For the background of ipywidget

# <style>
# .cell-output-ipywidget-background {
#     background-color: transparent !important;
# }
# :root {
#     --jp-widgets-color: var{--vscode-editor-foreground};
#     --jp-widgets-font-size: var{--vscode-editor-font-size};
# }
# </style>

In [24]:
import os
import random
from typing import List, Dict, Sequence
import json


def get_all_files_iteratively(folder_path: str) -> List[str]:
    """Finds iteratively all the files below the input folder.

    Args:
        folder_path (str): folder to look into.

    Returns:
        List[str]: the list of all the files.
    """
    all_files = []
    for dirpath, dirnames, filenames in os.walk(folder_path):
        for filename in filenames:
            all_files.append(os.path.join(dirpath, filename))
    return all_files


def split_files_into_lists(
    folder_path: str,
    sets_ratios: Sequence[int | float],
    sets_names: List[str],
    random_seed: int | None = None,
) -> Dict[str, List[str]]:
    """Splits files in a folder into multiple lists based on specified ratios.

    Args:
        folder_path (str): path to the folder containing the files.
        sets_ratios (List[int | float]): the proportions for each list.
        sets_names (List[str]): the keys for the dictionary
        random_seed (int | None, optional): a seed for the randomization. Defaults to None.

    Returns:
        Dict[str, List[str]]: a dictionary where each key from the input names is linked
        with a list of files.
    """
    files = get_all_files_iteratively(folder_path)
    total_ratio = sum(sets_ratios)
    ratios = [r / total_ratio for r in sets_ratios]

    if random_seed is not None:
        random.seed(random_seed)
    random.shuffle(files)
    split_indices = [0] * (len(ratios) + 1)
    split_indices[-1] = len(files)
    sum_ratios = 0.0
    for i in range(len(ratios) - 1):
        sum_ratios += ratios[i]
        split_indices[i + 1] = int(round(len(files) * (sum_ratios)))

    files_dict = {}
    for i in range(len(ratios)):
        files_dict[sets_names[i]] = files[split_indices[i] : split_indices[i + 1]]

    return files_dict


def save_splitted_files_into_lists(
    folder_path: str,
    sets_ratios: Sequence[int | float],
    sets_names: List[str],
    save_path: str,
    random_seed: int | None = None,
) -> None:
    files_dict = split_files_into_lists(
        folder_path=folder_path,
        sets_ratios=sets_ratios,
        sets_names=sets_names,
        random_seed=random_seed,
    )
    with open(save_path, "w") as f:
        json.dump(files_dict, f)


folder_path = "../data/images/cropped"
sets_ratios = [3, 1]
sets_names = ["training", "validation"]
save_path = "/home/alexandre/Documents/tree-segmentation/data/others/data_split.json"


files_dict = save_splitted_files_into_lists(
    folder_path, sets_ratios, sets_names, save_path
)

with open(save_path, "r") as f:
    print(json.load(f))

len(l) = 110
len(l) = 36
{'training': ['../data/images/cropped/2023_122000_484000_RGB_hrl/8320_0_8960_640.tif', '../data/images/cropped/2023_122000_484000_RGB_hrl/640_3200_1280_3840.tif', '../data/images/cropped/2023_122000_484000_RGB_hrl/1920_1280_2560_1920.tif', '../data/images/cropped/2023_122000_484000_RGB_hrl/5120_2560_5760_3200.tif', '../data/images/cropped/2023_122000_484000_RGB_hrl/7040_3840_7680_4480.tif', '../data/images/cropped/2023_122000_484000_RGB_hrl/6400_3200_7040_3840.tif', '../data/images/cropped/2023_122000_484000_RGB_hrl/0_640_640_1280.tif', '../data/images/cropped/2023_122000_484000_RGB_hrl/4480_1280_5120_1920.tif', '../data/images/cropped/2023_122000_484000_RGB_hrl/640_1920_1280_2560.tif', '../data/images/cropped/2023_122000_484000_RGB_hrl/4480_4480_5120_5120.tif', '../data/images/cropped/2023_122000_484000_RGB_hrl/3840_640_4480_1280.tif', '../data/images/cropped/2023_122000_484000_RGB_hrl/3840_2560_4480_3200.tif', '../data/images/cropped/2023_122000_484000_RGB_hr

### Get the cropping limits

In [None]:
from utils import create_folder, get_file_base_name, open_json

from data_processing import (
    find_annots_repartition,
    crop_annots_into_limits,
    annots_coordinates_to_local,
    save_annots_per_image,
    crop_all_rgb_and_chm_images_from_annotations_folder,
    ImageData,
    get_image_path_from_full_annotation_path,
    get_cropping_limits,
)
from lidar_preprocessing import (
    download_lidar_names_shapefile,
    get_lidar_files_from_image,
    download_and_remove_overlap_geotiles,
    create_full_lidar,
)
from rgb_preprocessing import download_rgb_image
from chm import compute_chm

In [None]:
# Define tile size and OVERLAP
# TILE_SIZE = 1920  # Size of each tile
# OVERLAP = 480  # Overlap between tiles
TILE_SIZE = 640  # Size of each tile
OVERLAP = 0  # Overlap between tiles

In [None]:
# Annotations file to use:

annotations_file_name = "10"

In [None]:
annotations_path = os.path.join(Folders.FULL_ANNOTS.value, annotations_file_name)

annotations = open_json(annotations_path)

full_image_path_tif = get_image_path_from_full_annotation_path(annotations)

download_rgb_image(full_image_path_tif)

In [None]:
image_data = ImageData(full_image_path_tif)

shapefile_path = download_lidar_names_shapefile()

GEOTILES_OVERLAP = 20
intersection_file_names = get_lidar_files_from_image(
    image_data, shapefile_path, GEOTILES_OVERLAP
)
intersection_file_paths = download_and_remove_overlap_geotiles(
    intersection_file_names, GEOTILES_OVERLAP
)

full_lidar_path, full_lidar_filtered_path = create_full_lidar(
    intersection_file_paths, image_data
)

RESOLUTION = 0.08

resolution = RESOLUTION
full_chm_path = os.path.join(
    Folders.CHM.value,
    f"{round(resolution*100)}cm",
    "unfiltered",
    "full",
    f"{image_data.coord_name}.tif",
)
create_folder(os.path.dirname(full_chm_path))
compute_chm(
    full_lidar_path,
    full_chm_path,
    image_data.width_pixel,
    image_data.height_pixel,
    resolution,
    verbose=True,
)
full_chm_filtered_path = os.path.join(
    Folders.CHM.value,
    f"{round(resolution*100)}cm",
    "filtered",
    "full",
    f"{image_data.coord_name}.tif",
)
create_folder(os.path.dirname(full_chm_filtered_path))
compute_chm(
    full_lidar_path,
    full_chm_filtered_path,
    image_data.width_pixel,
    image_data.height_pixel,
    resolution,
    verbose=True,
)

In [None]:
cropping_limits_x, cropping_limits_y = get_cropping_limits(
    full_image_path_tif, TILE_SIZE, OVERLAP
)
visibility_threshold = 0.2
annots_repartition = find_annots_repartition(
    cropping_limits_x, cropping_limits_y, annotations, visibility_threshold
)
crop_annots_into_limits(annots_repartition)
annots_coordinates_to_local(annots_repartition)

output_image_prefix = get_file_base_name(full_image_path_tif)
annotations_output_directory = os.path.join(
    Folders.CROPPED_ANNOTS.value, output_image_prefix
)
save_annots_per_image(
    annots_repartition, annotations_output_directory, full_image_path_tif
)

images_output_directory = os.path.join(
    Folders.CROPPED_IMAGES.value, output_image_prefix
)
crop_all_rgb_and_chm_images_from_annotations_folder(
    annotations_output_directory, resolution, full_image_path_tif
)

In [None]:
# crop_las(
#     "../data/lidar/unfiltered/full/122000_484000.laz",
#     "../data/lidar/unfiltered/full/122000_484000_cropped.laz",
#     (image_data.coord_box.x_min, image_data.coord_box.x_max  - 0.9*(image_data.coord_box.x_max - image_data.coord_box.x_min)),
#     (
#         image_data.coord_box.y_min
#         + 0.9 * (image_data.coord_box.y_max - image_data.coord_box.y_min),
#         image_data.coord_box.y_max,
#     ),
# )