# Identify buildings from DSM data

## Import necessary stuff for Google Colab

In [None]:
try:
    import google.colab

    IN_COLAB = True
except:
    IN_COLAB = False

Import the necessary files if running in Google Colab.

In [None]:
if IN_COLAB:
    import urllib.request
    import zipfile
    from pathlib import Path
    from shutil import copy, copytree, rmtree

    # Download the GitHub repository
    zip_path = Path("simple_model.zip")
    directory_path = Path(".")
    initial_simple_model_path = Path(
        "ML-Course-main/notebooks/1-introduction_shorter/simple_model"
    )
    simple_model_path = Path("simple_model")

    url = "https://github.com/sogelink-research/ML-Course/archive/refs/heads/main.zip"
    urllib.request.urlretrieve(url, zip_path)

    # Unzip the repository
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(directory_path)

    # Take the files from the simple model
    copytree(
        initial_simple_model_path,
        simple_model_path,
        copy_function=lambda s, d: not Path(d).exists() and copy(s, d),
        dirs_exist_ok=True,
    )

    # Take the requirements
    initial_requirements_path = Path(
        "ML-Course-main/notebooks/1-introduction_shorter/requirements.txt"
    )
    requirements_path = Path("requirements.txt")
    copy(initial_requirements_path, requirements_path)

    # Clean the rest
    zip_path.unlink()
    rmtree(Path("ML-Course-main"))

    def get_files(path: Path, extensions: list[str]):
        all_files = []
        for ext in extensions:
            all_files.extend(path.glob(f"*.{ext}"))
        return all_files

    for file_path in get_files(directory_path, ["py", "just"]):
        file_path.unlink()

    print("Downloaded the necessary files")

Install the necessary packages if running in Google Colab.

In [None]:
if IN_COLAB:
    %pip install -r requirements.txt

In [None]:
import datetime


def get_new_model_name() -> str:
    return datetime.datetime.now().strftime("%y%m%d_%H%M%S")

In [None]:
from pathlib import Path
from simple_model.dataloader import ImagesLoader
from simple_model.nn import SegmentationConvolutionalNetwork
from simple_model.dataparse import download_all
from simple_model.bbox import BboxInt
import torch

minx, maxy, maxx, miny = 120000, 487000, 125000, 482000
bbox = BboxInt(minx, maxy, maxx, miny, True)
image_size = 512
filter_buildings = True
main_data_folder = Path("data")
main_models_folder = Path("models")

main_data_folder.mkdir(parents=True, exist_ok=True)
main_models_folder.mkdir(parents=True, exist_ok=True)

data_folder, images_path, masks_path = download_all(
    bbox, main_data_folder, image_size, filter_buildings
)

image_shape = (image_size, image_size)
nodata = 0

images_loader = ImagesLoader(image_shape=image_shape, nodata=nodata)
images_loader.load_data(images_path, masks_path)
dataloaders = images_loader.get_dataloaders(batch_size=8, train_proportion=0.8)

model_name = get_new_model_name()
model_folder = main_models_folder / model_name
model_folder.mkdir(parents=True, exist_ok=True)
model = SegmentationConvolutionalNetwork(
    image_size=image_shape,
    encoder_channels=[16, 32, 64],
    layers_downsample=2,
    layers_upsample=2,
    input_channels=1,
    model_folder=model_folder,
    data_folders=[data_folder],
)

# Better speed for CPU
torch.compile(model)

visualisation_output = model_folder / "visualisation" / "output.png"
model.train_model(
    dataloaders=dataloaders,
    epochs=200,
    visualisation_output=visualisation_output,
    stop_early_after=20,
)

output_folder = model_folder / "output"
model.save_predictions(
    images_loader=images_loader,
    dataloaders=dataloaders,
    output_folder=output_folder,
)
metrics_folder = model_folder / "metrics"
model.save_metrics(
    images_loader=images_loader,
    dataloaders=dataloaders,
    metrics_folder=metrics_folder,
)
model.save_weights()

In [None]:
# model2 = SegmentationConvolutionalNetwork.load_model(model_folder)
# model2.save_predictions(
#     images_loader=images_loader,
#     dataloaders=dataloaders,
#     output_folder=output_folder,
# )