# Imports

In [1]:
import os
import rasterio
import numpy as np
from tqdm.auto import tqdm
from scipy.ndimage import zoom

# Helper Functions

In [None]:
def check_lbl_in_img(lbl_box, img_box):
    return (
        lbl_box.left >= img_box.left and
        lbl_box.bottom >= img_box.bottom and
        lbl_box.right <= img_box.right and
        lbl_box.top <= img_box.top
    )

def read_lbl(lbl_path):
    with rasterio.open(lbl_path) as lbl:
        bounds = lbl.bounds
        data = lbl.read(1)
    return data, bounds

def read_img(img_path, bounds):
    with rasterio.open(img_path) as img:
        window = img.window(left=bounds.left, bottom=bounds.bottom, right=bounds.right, top=bounds.top)
        data = img.read(window=window)
    return data

def reduce_resolution(rgb_data, lidar_shape):
    scale_factors = [
        lidar_shape[0] / rgb_data.shape[1],
        lidar_shape[1] / rgb_data.shape[2]
    ]

    resized_rgb = zoom(rgb_data[:, np.newaxis, :, :], (1, 1) + tuple(scale_factors), order=1)
    
    resized_rgb = resized_rgb[:, 0, :, :]

    return resized_rgb

# Constants

In [None]:
TRAIN_FOLDER = "data/train"
TEST_FOLDER = "data/test"
TRAIN_IMG_PATH = "data/train_area_tobias.tif"
TEST_IMG_PATH = "data/test_area_tobias.tif"

with rasterio.open(TRAIN_IMG_PATH) as train_img:
    TRAIN_BOUNDS = train_img.bounds

with rasterio.open(TEST_IMG_PATH) as train_img:
    TEST_BOUNDS = train_img.bounds

TRAIN_DEST_FOLDER = "data/processed/train"
TEST_DEST_FOLDER = "data/processed/test"

# Train

In [None]:
train_files = [f for f in os.listdir(TRAIN_FOLDER) if f.endswith('.tif')]
unusable_lbls = 0
i = 1
for train_file in tqdm(train_files):
    train_file_path = os.path.join(TRAIN_FOLDER, train_file)
    y, bbox = read_lbl(train_file_path)
    if not check_lbl_in_img(bbox, TRAIN_BOUNDS):
        unusable_lbls += 1
        continue
    X_high_res = read_img(TRAIN_IMG_PATH, bbox)
    X = reduce_resolution(X_high_res, y.shape)
    save_path = f"{TRAIN_DEST_FOLDER}/train_data_{i}.npz"
    np.savez(save_path, X=X, y=y)
    i += 1
print(f'unusable labels: {unusable_lbls}')

# Test

In [None]:
test_files = [f for f in os.listdir(TEST_FOLDER) if f.endswith('.tif')]
unusable_lbls = 0
i = 1
for test_file in tqdm(test_files):
    test_file_path = os.path.join(TEST_FOLDER, test_file)
    y, bbox = read_lbl(test_file_path)
    if not check_lbl_in_img(bbox, TEST_BOUNDS):
        unusable_lbls += 1
        continue
    X_high_res = read_img(TEST_IMG_PATH, bbox)
    X = reduce_resolution(X_high_res, y.shape)
    save_path = f"{TEST_DEST_FOLDER}/test_data_{i}.npz"
    np.savez(save_path, X=X, y=y)
    i += 1
print(f'unusable labels: {unusable_lbls}')