## Imagery Footprints and Grids
This notebook extracts the imagery footprints of the KH-9 images (excluding no data values) as polygons. The polygons are used to avoid duplication of crater counts and as extents for plots. The notebook also creates grids of varying grid sizes for each study area that are used for the comparison of aggregated crater counts with the THOR bombing data (see analysis notebooks).

#### Inputs:
* *study_areas -> [study_area] -> rasters*: KH-9 images for each study area (geotiff files)


#### Parameters
* *study_areas*: Names of the study_areas
* *grid_sizes*: Grid sizes (in m) to use for splitting up each study area into grid cells


#### Outputs:
* *footprint_path*: KH-9 image-level footprints (geojson files)
* *footprint_path_sa*: KH-9 study area footprints (geojson files)
* *grid_path_sa*: Grid cells covering each study area for specified *grid_sizes* (geojson files)

In [1]:
import numpy as np
import rasterio
import geopandas as gpd
from rasterio import features
from shapely.geometry import box, shape
from utils import load_config, create_dir

In [2]:
def extract_raster_footprint(input_path):
    with rasterio.open(input_path) as src:
        # read the raster
        array = src.read(1)

        # get a mask of valid entries
        mask = (array != src.nodata).astype("uint8")

        # convert the raster to a polygon to get the exact footprint
        res = features.shapes(mask, mask, transform=src.transform)
        geometry = []
        for poly, _ in res:
            geometry.append(shape(poly))

        df = gpd.GeoDataFrame({"geometry": geometry}, crs=src.crs)
        return df

In [3]:
def extract_study_area_footprints(
        study_area,
        rasters,
        footprint_path,
        footprint_path_sa):

    poly_comb = None

    for raster_id, raster_path in rasters.items():
        print(raster_id)

        # extract the exact raster footprint
        fp = extract_raster_footprint(raster_path)
        fp_path = footprint_path.format(
            study_area=study_area,
            raster_id=raster_id
        )

        # remove overlapping parts of footprint and save
        poly = fp.geometry[0]
        if not poly_comb:
            poly_no_overlap = poly
            poly_comb = poly
        else:
            poly_no_overlap = poly.difference(poly_comb)
        fp.loc[0, "geometry"] = poly_no_overlap

        create_dir(fp_path, is_file=True)
        fp.to_file(fp_path, driver="GeoJSON")

        # add no_overlap_footprint to the combined footprint
        poly_comb = poly_comb.union(poly_no_overlap)

    # the study area footprint is the union of all individual raster footprints
    df_sa = gpd.GeoDataFrame({"geometry": poly_comb}, index=[0], crs=fp.crs)

    fp_sa_path = footprint_path_sa.format(study_area=study_area)
    create_dir(fp_sa_path, is_file=True)
    df_sa.to_file(fp_sa_path, driver="GeoJSON")

In [4]:
def create_grid_within_area(area_path, out_path, grid_size):
    # Load the GeoJSON area into a GeoDataFrame
    area_gdf = gpd.read_file(area_path)

    # Combine all polygons in the area to create a single geometry
    area_union = area_gdf.unary_union

    # Calculate the bounding box of the combined area
    minx, miny, maxx, maxy = area_union.bounds

    # Create a grid of points based on the bounding box and grid size
    x_coords = np.arange(minx, maxx, grid_size)
    y_coords = np.arange(miny, maxy, grid_size)
    grid_points = [(x, y) for x in x_coords for y in y_coords]

    # Create grid cells using the points
    grid_cells = [box(x, y, x + grid_size, y + grid_size)
                  for x, y in grid_points]

    grid_cells_gdf = gpd.GeoDataFrame(geometry=grid_cells, crs=area_gdf.crs)
    # Filter out grid cells that are not completely within the area
    grid_cells_sa = gpd.sjoin(
        grid_cells_gdf,
        area_gdf,
        how="inner",
        predicate="within"
    ).drop("index_right", axis=1)

    create_dir(out_path, is_file=True)
    grid_cells_sa.to_file(out_path, driver="GeoJSON")

In [5]:
config = load_config("../config.yaml")
study_areas = config.get("study_areas").keys()
grid_sizes = config.get("grid_sizes")

In [6]:
%%time
for study_area in study_areas:
    rasters = config.get("study_areas").get(study_area).get("rasters")

    extract_study_area_footprints(
        study_area=study_area,
        rasters=rasters,
        footprint_path=config.get("footprint_path"),
        footprint_path_sa=config.get("footprint_path_sa")
    )

quang_tri_aft
Directory created: ../data/0_data_processing/quang_tri/footprints
Directory already exists: ../data/0_data_processing/quang_tri
D3C1204-200292A077
Directory created: ../data/0_data_processing/tri_border_area/footprints
D3C1204-200292A078
Directory already exists: ../data/0_data_processing/tri_border_area/footprints
D3C1204-200292A079
Directory already exists: ../data/0_data_processing/tri_border_area/footprints
D3C1204-200292A080
Directory already exists: ../data/0_data_processing/tri_border_area/footprints
D3C1204-200292A081
Directory already exists: ../data/0_data_processing/tri_border_area/footprints
D3C1204-200292A082
Directory already exists: ../data/0_data_processing/tri_border_area/footprints
Directory already exists: ../data/0_data_processing/tri_border_area
CPU times: total: 25min 6s
Wall time: 42min 20s


In [7]:
%%time
for study_area in study_areas:
    for grid_size in grid_sizes:
        print(grid_size)
        create_grid_within_area(
            area_path=config.get("footprint_path_sa").format(
                study_area=study_area),
            out_path=config.get("grid_path_sa").format(
                study_area=study_area, grid_size=grid_size),
            grid_size=grid_size
        )

100
Directory created: ../data/0_data_processing/quang_tri/grids
200
Directory already exists: ../data/0_data_processing/quang_tri/grids
400
Directory already exists: ../data/0_data_processing/quang_tri/grids
800
Directory already exists: ../data/0_data_processing/quang_tri/grids
1200
Directory already exists: ../data/0_data_processing/quang_tri/grids
1600
Directory already exists: ../data/0_data_processing/quang_tri/grids
2000
Directory already exists: ../data/0_data_processing/quang_tri/grids
2400
Directory already exists: ../data/0_data_processing/quang_tri/grids
2800
Directory already exists: ../data/0_data_processing/quang_tri/grids
3200
Directory already exists: ../data/0_data_processing/quang_tri/grids
3600
Directory already exists: ../data/0_data_processing/quang_tri/grids
4000
Directory already exists: ../data/0_data_processing/quang_tri/grids
100
Directory created: ../data/0_data_processing/tri_border_area/grids
200
Directory already exists: ../data/0_data_processing/tri_bord