# Prepare Dataset for Fine-Tuning of the HEIG-VD Model with the 10cm resolution dataset

In [None]:
import os

projsoilsroot = "/proj-soil"
config_train_gt = os.path.join(projsoilsroot, "config/train", "config-train_gt-10cm.yaml")
config_train_scratch = os.path.join(projsoilsroot, "config/train", "config-train_scratch-10cm.yaml")

## GT

### 1. Snap to 10cm

In [None]:
import os
import numpy as np
import geopandas as gpd

root = os.path.join(projsoilsroot,"data/GT/20240216/0-0-0-raw/test")
out_folder = os.path.join(projsoilsroot,"data/GT/20240216/1-snapped")

if not os.path.exists(out_folder):
    os.makedirs(out_folder)
    print(f"The directory {out_folder} was created.")

for file in os.listdir(root):
    if not file.endswith(".shp"):
        continue

    gt = gpd.read_file(os.path.join(root, file))
    bbox = list(gt.total_bounds)

    deviation_from_snap = np.array(bbox).round(1) - bbox
    plus_xmin, plus_ymin, _, _ = deviation_from_snap

    gt.geometry = gt.geometry.translate(plus_xmin, plus_ymin)
    gt.to_file(os.path.join(out_folder, file.replace(".shp", ".gpkg")))


### 2. Assign classes

In [None]:
source_folder = os.path.join(projsoilsroot,"data/GT/20240216/1-snapped")
out_folder = os.path.join(projsoilsroot,"data/GT/20240216/2-cleaned")

if not os.path.exists(out_folder):
    os.makedirs(out_folder)
    print(f"The directory {out_folder} was created.")

In [None]:
class_mapping = {
    np.nan: 0,
    "batiment": 1,
    "toit_vegetalise": 2,
    "surface_non_beton": 3,
    "surface_beton": 4,
    "eau_bassin": 5,
    "roche_dure_meuble": 6,
    "eau_naturelle": 7,
    "roseliere": 8,
    "sol_neige": 9,
    "sol_vegetalise": 10,
    "surface_riparienne": 11,
    "sol_divers": 12,
    "sol_vigne": 13,
    "sol_agricole": 14,
    "sol_bache": 15,
    "sol_serre_temporaire": 16,
    "serre_permanente": 17
}

soil_classes = [9, 10, 12, 13, 14, 15, 16]

package_mapping = {
    0: 0, 1: 1, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 6, 9: 7, 10: 8,
    11: 8, 12: 8, 13: 9, 14: 10, 15: 10, 16: 10, 17: 11
}

cl12_mapping = {
    0: 0, 
    1: 1, # batiment
    2: 1, # toit_vegetalise -> batiment
    3: 2, # surface_non_beton
    4: 3, # surface_beton
    5: 5, # eau_bassin -> eau_naturelle
    6: 4, # roche_dure_meuble
    7: 5, # eau_naturelle
    8: 6, # roseliere
    9: 7, # sol_neige
    10: 8, # sol_vegetalise
    11: 8, # surface_riparienne -> sol_vegetalise
    12: 9, # sol_divers
    13: 10, # sol_vigne
    14: 11, # sol_agricole
    15: 12, # sol_bache
    16: 12, # sol_serre_temporaire -> sol_bache
    17: 1, # serre_permanente -> batiment
    }


for root, dir, files in os.walk(source_folder):
        for file in files:
            if not file.endswith((".shp", ".gpkg")):
                continue
            
            gt = gpd.read_file(os.path.join(root, file))

            gt["CLASSE_SEN_ID"] = gt.apply(
                lambda x: class_mapping[x["CLASSE_SEN"]], axis=1)
            gt["package_id"] = gt.apply(
                lambda x: package_mapping[x["CLASSE_SEN_ID"]], axis=1)
            gt["cl12_id"] = gt.apply(
                lambda x: cl12_mapping[x["CLASSE_SEN_ID"]], axis=1)
            gt["SOIL"] = gt.apply(
                lambda x: x["CLASSE_SEN_ID"] in soil_classes, axis=1)


            # gt["geometry"] = gt.make_valid()

            gt = gt.dropna(subset = ["CLASSE_SEN_ID"])
            gt = gt.explode(index_parts=False)
            gt = gt.loc[gt["geometry"].geom_type=='Polygon']

            gt = gt[['CLASSE_SEN', 'CLASSE_SEN_ID', 'package_id', 'cl12_id', 'SOIL', 'geometry']]

            if file.endswith(".shp"):
                gt.to_file(os.path.join(out_folder, file.rstrip(".shp")+".gpkg"), driver="GPKG")
            else:
                gt.to_file(os.path.join(out_folder, file), driver="GPKG")

In [None]:
gt.head()

### 3. Rasterize

In [None]:
! python utilities/rasterize_gt.py --config_file {config_train_gt}

### 4. Cut tiff to predefined grid of Daniel

In [None]:
! python utilities/cut_tiff_to_grid.py --config_file {config_train_gt}

## Scratch

### 1. RGBI -> RGB

In [None]:
! python utilities/rgbi2rgb.py --config_file {config_train_scratch}

### 2. Ensure every file has resolution 10cm

In [None]:
! python utilities/rescale_tif.py --config_file {config_train_scratch}

### 3. Cut tiff to grid

In [None]:
! python utilities/cut_tiff_to_grid.py --config_file {config_train_scratch}

## Create Dataset

In [None]:
! python utilities/random_split.py --config_file {config_train_gt}