# Notes

In [2]:
#see https://colab.research.google.com/drive/15LFRMVOfEiF__FswVqTQstZC5ocC6Ur0?usp=sharing for cropping functions, may require modification

# for each drone_survey:
#     for each row in ground truth:
#         crop rgb_tile from drone_survey
#         save tile to data/raster/tiles/drone_survey/idx.tif


# Setup

In [1]:
import geopandas as gpd
import rasterio as rio
import concurrent.futures
import threading
import os
from tqdm import tqdm

# Source

## Download

In [46]:
# List of cloud optimized geotiffs
aerial_survey_geotiffs = [
    'gs://mpg-aerial-survey/surveys/240731_upperpartridge/processing/dronedeploy/240731_upperpartridge-visible.tif'
]

In [None]:
# Download geotiffs
raster_dir = '../data/raster'

for geotiff in aerial_survey_geotiffs:
    gcloud_command = f"gcloud storage cp {geotiff} {raster_dir}"
    os.system(gcloud_command)

## Local resources

In [48]:
geotiff_path = '../data/raster/240731_upperpartridge-visible.tif'
geojson_path = '../data/vector/groundtruth.geojson'

# Functions

In [49]:
# Define the function that creates a bounding box and returns its min and max coordinates
def bbox_side_len(point, side_len=10):
    if point is None or point.is_empty:
        return None, None, None, None  # Return None for all four values if the point is invalid
    half_side = side_len / 2
    minx, miny = point.x - half_side, point.y - half_side
    maxx, maxy = point.x + half_side, point.y + half_side
    return minx, miny, maxx, maxy

def process_bbox(raster_path, bbox, output_dir, read_lock, write_lock):
    minx, miny, maxx, maxy = bbox
    if minx is None:
        return "Invalid point"

    try:
        with rio.open(raster_path) as src:
            # Check if the bbox is within the raster bounds
            raster_bounds = src.bounds
            if (minx >= raster_bounds.right or maxx <= raster_bounds.left or
                miny >= raster_bounds.top or maxy <= raster_bounds.bottom):
                return "Bounding box outside raster extent"

            window = src.window(minx, miny, maxx, maxy)

            # Ensure the window has a valid size
            if window.width < 1 or window.height < 1:
                return "Resulting window too small"

            with read_lock:
                src_array = src.read(window=window)

            # If the read array is empty, skip this bbox
            if src_array.size == 0:
                return "Empty array read"

            profile = src.profile.copy()
            profile.update({
                "height": src_array.shape[1],
                "width": src_array.shape[2],
                "transform": rio.windows.transform(window, src.transform)
            })

            output_file = f"{output_dir}/crop_{minx}_{miny}.tif"
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            with write_lock:
                with rio.open(output_file, "w", **profile) as dst:
                    dst.write(src_array)

        return "Success"
    except Exception as e:
        return f"Error: {str(e)}"

def do_tiling(raster_path, points_gdf, output_dir, num_workers=4, side_len=10):
    bboxes = points_gdf.geometry.apply(lambda point: bbox_side_len(point, side_len=side_len))
    valid_bboxes = [bbox for bbox in bboxes if bbox[0] is not None]

    read_lock = threading.Lock()
    write_lock = threading.Lock()

    pbar = tqdm(total=len(valid_bboxes), desc="Processing bounding boxes", unit="bbox")
    errors = []

    def process_and_update(bbox):
        result = process_bbox(raster_path, bbox, output_dir, read_lock, write_lock)
        pbar.update(1)
        if result != "Success":
            errors.append(result)

    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
        list(executor.map(process_and_update, valid_bboxes))

    pbar.close()

    if errors:
        print(f"\nEncountered {len(errors)} errors:")
        for error in errors[:10]:  # Print first 10 errors
            print(f"  - {error}")
        if len(errors) > 10:
            print(f"  ... and {len(errors) - 10} more.")

# Crop Tiles

In [43]:
target_crop_sz_meters = 10 # how big to make crops in meters
num_cores = os.cpu_count()
print(f'Using {num_cores} cores')

Using 8 cores


In [None]:
# Presence and Absence directories
presence_dir = '../data/raster/tiles/presence'
absence_dir = '../data/raster/tiles/absence'

# Create necessary directories
os.makedirs(presence_dir, exist_ok=True)
os.makedirs(absence_dir, exist_ok=True)

# Read the GeoJSON file
gdf = gpd.read_file(geojson_path)

# Read the data and check CRS
with rio.open(geotiff_path) as src:
    raster_crs = src.crs
    print(f"Raster CRS: {raster_crs}")

gdf = gpd.read_file(geojson_path)
print(f"GeoJSON CRS: {gdf.crs}")

# Reproject GeoJSON to match raster if needed
if gdf.crs != raster_crs:
    print(f"Reprojecting from {gdf.crs} to {raster_crs}")
    gdf = gdf.to_crs(raster_crs)
    
# Filter for presence points (presence == 1)
presence_points = gdf[gdf['Presence'] == 1]
# Filter for absence points (presence == 0)
absence_points = gdf[gdf['Presence'] == 0]

# Process presence points
do_tiling(geotiff_path, presence_points, presence_dir, 
          num_workers=num_cores, side_len=target_crop_sz_meters)

# Process absence points
do_tiling(geotiff_path, absence_points, absence_dir, 
          num_workers=num_cores, side_len=target_crop_sz_meters)

In [51]:
# gdf Presence unique values
print(f"Presence unique values: {gdf['Presence'].unique()}")

Presence unique values: [1 0]


# Export

In [None]:
# zip -rq data/interim/tiles to tiles.zip
!zip -rq data/processed/tiles.zip data/interim/tiles