<a href="https://colab.research.google.com/github/rdronamaps/exiftask/blob/master/Kubernetes_potholes_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import logging
import rasterio
from rasterio.windows import Window
import numpy as np
from PIL import Image
from ultralytics import YOLO
import torch
import geopandas as gpd
from shapely.geometry import box
from google.cloud import storage
import time

# Load YOLOv8 model (replace with your custom-trained model)
model = YOLO('/mnt/models/model.pth')

# Initialize Google Cloud Storage client
storage_client = storage.Client()

def setup_logging(tiff_filename):
    """Sets up logging for each TIFF file processing."""
    log_filename = f"/tmp/logs/{tiff_filename.replace('.tif', '.log')}"
    os.makedirs(os.path.dirname(log_filename), exist_ok=True)
    logging.basicConfig(
        filename=log_filename,
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s'
    )
    logging.info(f"Started processing file: {tiff_filename}")

def tile_orthomosaic(orthomosaic_path, tile_size):
    """Tile the orthomosaic into smaller sections for YOLOv8 processing."""
    if not os.path.exists(orthomosaic_path):
        logging.error(f"Error: File {orthomosaic_path} does not exist.")
        return [], None

    try:
        with rasterio.open(orthomosaic_path) as dataset:
            width = dataset.width
            height = dataset.height
            tiles = []

            for y in range(0, height, tile_size):
                for x in range(0, width, tile_size):
                    window = Window(x, y, tile_size, tile_size)
                    tile = dataset.read(window=window)
                    tile = np.moveaxis(tile, 0, -1)

                    if np.sum(tile) == 0:  # Skip empty tiles
                        continue
                    tiles.append((tile, x, y))  # Store tile and its position

            return tiles, dataset

    except Exception as e:
        logging.error(f"Error reading {orthomosaic_path}: {e}")
        return [], None

def run_inference(tiles, model):
    """Run YOLOv8 inference on each tile."""
    predictions = []
    for tile, x_offset, y_offset in tiles:
        if tile.sum() == 0:
            continue

        try:
            tile_img = Image.fromarray(tile.astype(np.uint8))
            results = model(tile_img)  # Run YOLOv8 inference
            predictions.append((results, x_offset, y_offset))
        except Exception as e:
            logging.error(f"Error during inference for tile at ({x_offset}, {y_offset}): {e}")
            continue

    return predictions

def filter_bounding_boxes_by_area(boxes, area_threshold_sqm, gsd):
    """Filter bounding boxes by area using GSD (meters per pixel)."""
    filtered_boxes = []
    try:
        for box in boxes:
            xmin, ymin, xmax, ymax = box.cpu().numpy()
            width_px = xmax - xmin
            height_px = ymax - ymin
            area_px = width_px * height_px
            area_sqm = area_px * (gsd ** 2)

            if area_sqm <= area_threshold_sqm:
                filtered_boxes.append(box)  # Keep boxes within the threshold
        return filtered_boxes

    except Exception as e:
        logging.error(f"Error filtering bounding boxes: {e}")
        return []

def georeference_bounding_boxes(predictions, dataset, tile_size, area_threshold_sqm, gsd):
    """Georeference bounding boxes to convert pixel coordinates to geographic coordinates."""
    georeferenced_results = []

    if not predictions:
        logging.warning("No predictions to georeference.")
        return georeferenced_results

    try:
        for results, x_offset, y_offset in predictions:
            boxes = results[0].boxes.xyxy  # Get bounding boxes
            filtered_boxes = filter_bounding_boxes_by_area(boxes, area_threshold_sqm, gsd)

            for box in filtered_boxes:
                xmin, ymin, xmax, ymax = box.cpu().numpy()
                xmin += x_offset
                xmax += x_offset
                ymin += y_offset
                ymax += y_offset

                # Convert pixel coordinates to geographic coordinates
                top_left_geo = dataset.transform * (xmin, ymin)
                bottom_right_geo = dataset.transform * (xmax, ymax)
                georeferenced_results.append((top_left_geo, bottom_right_geo))

        return georeferenced_results

    except Exception as e:
        logging.error(f"Error georeferencing bounding boxes: {e}")
        return []

def save_to_geojson(geo_boxes, dataset, output_path):
    """Save georeferenced bounding boxes as a GeoJSON file."""
    if not geo_boxes:
        logging.warning(f"No georeferenced boxes to save for {output_path}.")
        return

    try:
        polygons = [box(xmin, ymin, xmax, ymax) for ((xmin, ymin), (xmax, ymax)) in geo_boxes]
        gdf = gpd.GeoDataFrame(geometry=polygons, crs=dataset.crs)
        gdf.to_file(output_path, driver="GeoJSON")
        logging.info(f"Saved results to {output_path}")
    except Exception as e:
        logging.error(f"Error saving GeoJSON file: {e}")

def process_gcs_folder(gcs_input_bucket, gcs_output_bucket, folder_name):
    """Process all .tif files in a Google Cloud Storage folder."""
    input_bucket = storage_client.bucket(gcs_input_bucket)
    output_bucket = storage_client.bucket(gcs_output_bucket)

    blobs = input_bucket.list_blobs(prefix=f"{folder_name}/")
    for blob in blobs:
        if blob.name.endswith(".tif"):
            # Download the TIFF file to local storage
            local_tiff_path = f"/tmp/{os.path.basename(blob.name)}"
            blob.download_to_filename(local_tiff_path)

            setup_logging(blob.name)
            logging.info(f"Processing file: {local_tiff_path}")

            # Process the TIFF file
            tiles, dataset = tile_orthomosaic(local_tiff_path, tile_size=640)
            predictions = run_inference(tiles, model)
            geo_boxes = georeference_bounding_boxes(predictions, dataset, tile_size=640, area_threshold_sqm=20, gsd=0.0183)

            # Save GeoJSON to a local file
            local_geojson_path = local_tiff_path.replace(".tif", ".geojson")
            save_to_geojson(geo_boxes, dataset, local_geojson_path)

            # Upload GeoJSON back to GCS
            geojson_blob = output_bucket.blob(f"{folder_name}/{os.path.basename(local_geojson_path)}")
            geojson_blob.upload_from_filename(local_geojson_path)
            logging.info(f"Uploaded {local_geojson_path} to {gcs_output_bucket}")

def monitor_and_process_folders(gcs_input_bucket, gcs_output_bucket):
    """Monitor GCS bucket for new folders and process them."""
    processed_folders = set()
    while True:
        # List all folders in the GCS input bucket
        blobs = storage_client.bucket(gcs_input_bucket).list_blobs()
        for blob in blobs:
            if blob.name.endswith("/"):
                folder_name = blob.name.rstrip("/")
                if folder_name not in processed_folders:
                    logging.info(f"New folder detected: {folder_name}")
                    process_gcs_folder(gcs_input_bucket, gcs_output_bucket, folder_name)
                    processed_folders.add(folder_name)

        time.sleep(10)  # Check every 10 seconds for new folders

if __name__ == "__main__":
    gcs_input_bucket = "your-input-bucket"
    gcs_output_bucket = "your-output-bucket"
    monitor_and_process_folders(gcs_input_bucket, gcs_output_bucket)
