In [6]:
import os
import random
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from rasterio.features import rasterize
from shapely.geometry import box
from sklearn.model_selection import train_test_split
import numpy as np
import csv
import logging

# Set the logging level to WARNING to suppress DEBUG messages
logging.getLogger('rasterio').setLevel(logging.WARNING)
# Define paths
shapefile_path = "C:/_GLOBAL/TRAVAIL/OCS/cosia/FLAIR-1-main/FLAIR-1-main/data/parking/osm_parking_final_2.gpkg"
raster_path = "C:/_GLOBAL/TRAVAIL/OCS/cosia/input/tm_ortho2022.vrt"
output_dir = "C:/_GLOBAL/TRAVAIL/OCS/cosia/FLAIR-1-main/FLAIR-1-main/data/parking/dataset2"
tile_size = 512  # Tile size in pixels

# Create output folders
folders = ["train/images", "train/labels", "val/images", "val/labels", "test/images", "test/labels"]
for folder in folders:
    os.makedirs(os.path.join(output_dir, folder), exist_ok=True)

# Load data
gdf = gpd.read_file(shapefile_path) 
with rasterio.open(raster_path) as src:
    raster_crs = src.crs

if gdf.crs != raster_crs:
    gdf = gdf.to_crs(raster_crs)

# Generate tiles based on parking lot polygons
tiles = []
for _, row in gdf.iterrows():
    center = row['geometry'].centroid
    tile = box(
        center.x - tile_size / 2, center.y - tile_size / 2,
        center.x + tile_size / 2, center.y + tile_size / 2
    )
    tiles.append(tile)

# Check for valid data in tile
def has_valid_data(tile_image, no_data_value=0):
    return np.any(tile_image != no_data_value)

# Rasterize geometries
def create_binary_mask(intersection, out_transform, tile_size):
    shapes = ((geom, 1) for geom in intersection.geometry)
    return rasterize(
        shapes=shapes, out_shape=(tile_size, tile_size),
        transform=out_transform, fill=0, dtype="uint8"
    )

# Process a single tile
def process_tile(tile, raster_path, gdf, output_dir, tile_index, split):
    # Create a GeoDataFrame for the current tile
    tile_gdf = gpd.GeoDataFrame({"geometry": [tile]}, crs=raster_crs)
    
    # Perform intersection to check overlap with parking polygons
    intersection = gpd.overlay(tile_gdf, gdf, how='intersection')
    
    if intersection.empty:
        # Log the absence of intersection and skip this tile
        print(f"Tile {tile_index:04d} has no intersection with polygons. Skipping.")
        return None

    # Proceed with raster masking if there is an intersection
    with rasterio.open(raster_path) as src:
        try:
            out_image, out_transform = mask(src, [tile], crop=True)
            out_image = out_image[:, :tile_size, :tile_size]
        except ValueError as e:
            # Handle tiles outside raster bounds
            print(f"Tile {tile_index:04d} is outside raster bounds: {e}")
            return None

    if not has_valid_data(out_image):
        print(f"Tile {tile_index:04d} contains no valid raster data. Skipping.")
        return None

    # Define folder based on split (train/val/test)
    folder_prefix = split if split else "train"
    image_tile_path = os.path.join(output_dir, f"{folder_prefix}/images/tile_{tile_index:04d}.tif")
    label_tile_path = os.path.join(output_dir, f"{folder_prefix}/labels/tile_{tile_index:04d}.tif")

    # Save the image tile
    with rasterio.open(
        image_tile_path, "w", driver="GTiff", height=tile_size,
        width=tile_size, count=src.count, dtype=out_image.dtype,
        crs=raster_crs, transform=out_transform
    ) as dst:
        dst.write(out_image)

    # Save the label tile
    binary_mask = create_binary_mask(intersection, out_transform, tile_size)
    with rasterio.open(
        label_tile_path, "w", driver="GTiff", height=tile_size,
        width=tile_size, count=1, dtype="uint8", crs=raster_crs, transform=out_transform
    ) as dst:
        dst.write(binary_mask, 1)

    return image_tile_path, label_tile_path

# Process tiles sequentially
tile_results = []
for tile_index, tile in enumerate(tiles):
    result = process_tile(tile, raster_path, gdf, output_dir, tile_index, "train")  # Initially assign to train
    if result:
        tile_results.append(result)

# Split dataset into train, val, test
image_tiles, label_tiles = zip(*tile_results)
train_images, temp_images, train_labels, temp_labels = train_test_split(
    image_tiles, label_tiles, test_size=0.3, random_state=42
)
val_images, test_images, val_labels, test_labels = train_test_split(
    temp_images, temp_labels, test_size=0.33, random_state=42
)

# Move the val and test images and labels to the appropriate folders
for val_index, val_image in enumerate(val_images):
    os.rename(val_image, val_image.replace("train", "val"))
    os.rename(val_labels[val_index], val_labels[val_index].replace("train", "val"))

for test_index, test_image in enumerate(test_images):
    os.rename(test_image, test_image.replace("train", "test"))
    os.rename(test_labels[test_index], test_labels[test_index].replace("train", "test"))

# Save CSV files
def write_csv(image_paths, label_paths, csv_path):
    with open(csv_path, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["image", "label"])
        writer.writerows(zip(image_paths, label_paths))

write_csv(train_images, train_labels, os.path.join(output_dir, "train.csv"))
write_csv(val_images, val_labels, os.path.join(output_dir, "val.csv"))
write_csv(test_images, test_labels, os.path.join(output_dir, "test.csv"))


Tile 0055 contains no valid raster data. Skipping.
Tile 0057 contains no valid raster data. Skipping.
Tile 0083 contains no valid raster data. Skipping.
Tile 0244 contains no valid raster data. Skipping.
Tile 0250 contains no valid raster data. Skipping.
Tile 0251 contains no valid raster data. Skipping.
Tile 0336 contains no valid raster data. Skipping.
Tile 0337 contains no valid raster data. Skipping.
Tile 0340 contains no valid raster data. Skipping.
Tile 0341 contains no valid raster data. Skipping.
Tile 0343 contains no valid raster data. Skipping.
Tile 0344 contains no valid raster data. Skipping.
Tile 0347 contains no valid raster data. Skipping.
Tile 0351 contains no valid raster data. Skipping.
Tile 0352 contains no valid raster data. Skipping.
Tile 0358 contains no valid raster data. Skipping.
Tile 0388 contains no valid raster data. Skipping.
Tile 0428 contains no valid raster data. Skipping.
Tile 0430 contains no valid raster data. Skipping.
Tile 0468 contains no valid ras