# Prepare Dataset for Fine-Tuning of the heig-vd model

In [None]:
import os

projsoilsroot = "/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/proj-soils"
config_train_gt_10cm = os.path.join(projsoilsroot, "config", "train", "config-train_gt-10cm.yaml")
config_train_gt_20cm = os.path.join(projsoilsroot, "config", "train", "config-train_gt-20cm.yaml")
config_train_gt_40cm = os.path.join(projsoilsroot, "config", "train", "config-train_gt-40cm.yaml")
config_train_scratch_10cm = os.path.join(projsoilsroot, "config", "train", "config-train_scratch-10cm.yaml")
config_train_scratch_20cm = os.path.join(projsoilsroot, "config", "train", "config-train_scratch-20cm.yaml")
config_train_scratch_40cm = os.path.join(projsoilsroot, "config", "train", "config-train_scratch-40cm.yaml")

## GT

### 1. Snap to 10cm

In [None]:
import os
import numpy as np
import geopandas as gpd


root = "/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/GT/20240216/0-0-0-raw"
out_folder = "/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/GT/20240216/0-0-snapped"

for file in os.listdir(root):
    if not file.endswith(".shp"):
        continue

    gt = gpd.read_file(os.path.join(root, file))
    bbox = list(gt.total_bounds)

    deviation_from_snap = np.array(bbox).round(1) - bbox
    plus_xmin, plus_ymin, _, _ = deviation_from_snap

    gt.geometry = gt.geometry.translate(plus_xmin, plus_ymin)
    gt.to_file(os.path.join(out_folder, file.replace(".shp", ".gpkg")))


### 2. Assign classes

In [None]:
source_folder = "/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/GT/20240216/0-0-snapped"
out_folder = "/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/GT/20240216/0-cleaned"

In [None]:
class_mapping = {
    np.nan: 0,
    "batiment": 1,
    "toit_vegetalise": 2,
    "surface_non_beton": 3,
    "surface_beton": 4,
    "eau_bassin": 5,
    "roche_dure_meuble": 6,
    "eau_naturelle": 7,
    "roseliere": 8,
    "sol_neige": 9,
    "sol_vegetalise": 10,
    "surface_riparienne": 11,
    "sol_divers": 12,
    "sol_vigne": 13,
    "sol_agricole": 14,
    "sol_bache": 15,
    "sol_serre_temporaire": 16,
    "serre_permanente": 17
}

soil_classes = [9, 10, 12, 13, 14, 15, 16]

package_mapping = {
    0: 0, 1: 1, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 6, 9: 7, 10: 8,
    11: 8, 12: 8, 13: 9, 14: 10, 15: 10, 16: 10, 17: 11
}

cl12_mapping = {
    0: 0, 
    1: 1, # batiment
    2: 1, # toit_vegetalise -> batiment
    3: 2, # surface_non_beton
    4: 3, # surface_beton
    5: 5, # eau_bassin -> eau_naturelle
    6: 4, # roche_dure_meuble
    7: 5, # eau_naturelle
    8: 6, # roseliere
    9: 7, # sol_neige
    10: 8, # sol_vegetalise
    11: 8, # surface_riparienne -> sol_vegetalise
    12: 9, # sol_divers
    13: 10, # sol_vigne
    14: 11, # sol_agricole
    15: 12, # sol_bache
    16: 12, # sol_serre_temporaire -> sol_bache
    17: 1, # serre_permanente -> batiment
    }


for root, dir, files in os.walk(source_folder):
        for file in files:
            if not file.endswith((".shp", ".gpkg")):
                continue
            
            gt = gpd.read_file(os.path.join(root, file))

            gt["CLASSE_SEN_ID"] = gt.apply(
                lambda x: class_mapping[x["CLASSE_SEN"]], axis=1)
            gt["package_id"] = gt.apply(
                lambda x: package_mapping[x["CLASSE_SEN_ID"]], axis=1)
            gt["cl12_id"] = gt.apply(
                lambda x: cl12_mapping[x["CLASSE_SEN_ID"]], axis=1)
            gt["SOIL"] = gt.apply(
                lambda x: x["CLASSE_SEN_ID"] in soil_classes, axis=1)


            # gt["geometry"] = gt.make_valid()

            gt = gt.dropna(subset = ["CLASSE_SEN_ID"])
            gt = gt.explode(index_parts=False)
            gt = gt.loc[gt["geometry"].geom_type=='Polygon']

            gt = gt[['CLASSE_SEN', 'CLASSE_SEN_ID', 'package_id', 'cl12_id', 'SOIL', 'geometry']]

            if file.endswith(".shp"):
                gt.to_file(os.path.join(out_folder, file.rstrip(".shp")+".gpkg"), driver="GPKG")
            else:
                gt.to_file(os.path.join(out_folder, file), driver="GPKG")

In [None]:
gt.head()

### 3. Rasterize

In [None]:
! python utilities/rasterize_gt.py --config_file {config_train_gt_10cm}

### 4. Rescale

10cm -> 20cm

In [None]:
! python utilities/rescale_tif.py --config_file {config_train_gt_20cm}

10cm -> 40cm

In [None]:
! python utilities/rescale_tif.py --config_file {config_train_gt_40cm}

### 4. Cut tiff to Grid

10cm

In [None]:
! python utilities/cut_tiff_to_grid.py --config_file {config_train_gt_10cm}

20cm

In [22]:
! python utilities/cut_tiff_to_grid.py --config_file {config_train_gt_20cm}

[32m2024-03-08 09:47:14.330[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m258[0m - [1mTIFF_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/GT/20240216/3-rasterized-12cl/20cm'[0m
[32m2024-03-08 09:47:14.331[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m259[0m - [1mOUT_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/GT/20240216/4-cut-to-grid-12cl/20cm/512px'[0m
[32m2024-03-08 09:47:14.331[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m260[0m - [1mGRID_PATH = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/heig-vd_finetuned/recursive_grids_max204-8m.gpkg'[0m
[32m2024-03-08 09:47:14.331[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m261[0m - [1mGRID_QUERY = 'depth == 1'[0m
[32m2024-03-08 09:47:14.331[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m262[0m - [1mCELL_LENGTH = 512[0m
[32m2024-03-08 09:47:14.331[0m | [1mI

40cm

In [23]:
! python utilities/cut_tiff_to_grid.py --config_file {config_train_gt_40cm}

[32m2024-03-08 09:47:19.746[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m258[0m - [1mTIFF_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/GT/20240216/3-rasterized-12cl/40cm'[0m
[32m2024-03-08 09:47:19.746[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m259[0m - [1mOUT_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/GT/20240216/4-cut-to-grid-12cl/40cm/512px'[0m
[32m2024-03-08 09:47:19.746[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m260[0m - [1mGRID_PATH = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/heig-vd_finetuned/recursive_grids_max204-8m.gpkg'[0m
[32m2024-03-08 09:47:19.746[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m261[0m - [1mGRID_QUERY = 'depth == 2'[0m
[32m2024-03-08 09:47:19.746[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m262[0m - [1mCELL_LENGTH = 512[0m
[32m2024-03-08 09:47:19.746[0m | [1mI

## Scratch

### 1. RGBI -> RGB

In [None]:
! python utilities/rgbi2rgb.py --config_file {config_train_scratch_10cm}

### 2. Rescale

10cm -> 10cm

In [None]:
! python utilities/rescale.py --config_file {config_train_scratch_10cm}

10cm -> 20cm

In [None]:
! python utilities/rescale.py --config_file {config_train_scratch_20cm}

10cm -> 40cm

In [None]:
! python utilities/rescale.py --config_file {config_train_scratch_40cm}

### 3. Cut tiff to grid

10cm

In [None]:
! python utilities/cut_tiff_to_grid.py --config_file {config_train_scratch_10cm}

20cm


In [20]:
! python utilities/cut_tiff_to_grid.py --config_file {config_train_scratch_20cm}

[32m2024-03-08 09:46:13.924[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m258[0m - [1mTIFF_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/scratch/horizontal_scratch_mosaics-rgb/2-rescaled/20cm'[0m
[32m2024-03-08 09:46:13.925[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m259[0m - [1mOUT_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/scratch/horizontal_scratch_mosaics-rgb/3-cut_to_grid/20cm/512px'[0m
[32m2024-03-08 09:46:13.925[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m260[0m - [1mGRID_PATH = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/heig-vd_finetuned/recursive_grids_max204-8m.gpkg'[0m
[32m2024-03-08 09:46:13.925[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m261[0m - [1mGRID_QUERY = 'depth == 1'[0m
[32m2024-03-08 09:46:13.925[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m262[0m - [1mCELL_LENGTH = 512[0

40cm


In [21]:
! python utilities/cut_tiff_to_grid.py --config_file {config_train_scratch_40cm}

[32m2024-03-08 09:46:24.566[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m258[0m - [1mTIFF_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/scratch/horizontal_scratch_mosaics-rgb/2-rescaled/40cm'[0m
[32m2024-03-08 09:46:24.566[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m259[0m - [1mOUT_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/scratch/horizontal_scratch_mosaics-rgb/3-cut_to_grid/40cm/512px'[0m
[32m2024-03-08 09:46:24.566[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m260[0m - [1mGRID_PATH = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/heig-vd_finetuned/recursive_grids_max204-8m.gpkg'[0m
[32m2024-03-08 09:46:24.566[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m261[0m - [1mGRID_QUERY = 'depth == 2'[0m
[32m2024-03-08 09:46:24.566[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m262[0m - [1mCELL_LENGTH = 512[0

## Create Dataset

10cm

In [24]:
! python utilities/random_split.py --config_file {config_train_gt_10cm}

[32m2024-03-08 09:50:03.401[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m156[0m - [1mSOURCE_IPT_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/scratch/horizontal_scratch_mosaics-rgb/3-cut_to_grid/10cm/512px'[0m
[32m2024-03-08 09:50:03.401[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m157[0m - [1mSOURCE_TGT_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/GT/20240216/4-cut-to-grid-12cl/10cm/512px'[0m
[32m2024-03-08 09:50:03.402[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m158[0m - [1mTARGET_ROOT = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/datasets.nosync/dataset_12cl_seed6-adjusted_multiscale'[0m
[32m2024-03-08 09:50:03.402[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m159[0m - [1mSEED = None[0m
[32m2024-03-08 09:50:03.402[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m160[0m - [1mSPLIT_FILE = '/Users/nicib

20cm

In [25]:
! python utilities/random_split.py --config_file {config_train_gt_20cm}

[32m2024-03-08 09:50:10.880[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m156[0m - [1mSOURCE_IPT_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/scratch/horizontal_scratch_mosaics-rgb/3-cut_to_grid/20cm/512px'[0m
[32m2024-03-08 09:50:10.880[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m157[0m - [1mSOURCE_TGT_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/GT/20240216/4-cut-to-grid-12cl/20cm/512px'[0m
[32m2024-03-08 09:50:10.880[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m158[0m - [1mTARGET_ROOT = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/datasets.nosync/dataset_12cl_seed6-adjusted_multiscale'[0m
[32m2024-03-08 09:50:10.880[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m159[0m - [1mSEED = None[0m
[32m2024-03-08 09:50:10.880[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m160[0m - [1mSPLIT_FILE = '/Users/nicib

40cm

In [26]:
! python utilities/random_split.py --config_file {config_train_gt_40cm}

[32m2024-03-08 09:50:13.007[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m156[0m - [1mSOURCE_IPT_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/scratch/horizontal_scratch_mosaics-rgb/3-cut_to_grid/40cm/512px'[0m
[32m2024-03-08 09:50:13.007[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m157[0m - [1mSOURCE_TGT_FOLDER = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/GT/20240216/4-cut-to-grid-12cl/40cm/512px'[0m
[32m2024-03-08 09:50:13.007[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m158[0m - [1mTARGET_ROOT = '/Users/nicibe/Desktop/Job/swisstopo_stdl/soil_fribourg/data/datasets.nosync/dataset_12cl_seed6-adjusted_multiscale'[0m
[32m2024-03-08 09:50:13.007[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m159[0m - [1mSEED = None[0m
[32m2024-03-08 09:50:13.007[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m160[0m - [1mSPLIT_FILE = '/Users/nicib