In [None]:
import fiona
import geopandas as gpd
from tqdm.notebook import tqdm
import pandas as pd
import pathlib

In [None]:
ROOT = pathlib.Path('data/wastewater_catchment_areas_public')

catchments = gpd.read_file(ROOT / 'catchments_consolidated.shp').set_index('identifier')
print(f'loaded {len(catchments)} catchments')
catchments.head()

In [None]:
lsoas = gpd.read_file('data/geoportal.statistics.gov.uk/LSOA11_BGC.zip').set_index('LSOA11CD')
print(f'loaded {len(lsoas)} LSOAs')
lsoas.head()

In [None]:
# Evaluate the intersection area between LSOAs and catchments.
catchment_idx, lsoa_idx = lsoas.sindex.query_bulk(catchments.geometry)
print(f'found {len(catchment_idx)} intersections between catchments and LSOAs')
print(f'{len(set(lsoa_idx))} of {len(lsoas)} LSOAs intersect at least one catchment (at the '
      'envelope level)')

# Evaluate the proper intersection areas (not just whether they intersect).
intersection_areas = [catchments.geometry.iloc[i].intersection(lsoas.geometry.iloc[j]).area
                      for i, j in tqdm(zip(catchment_idx, lsoa_idx), total=len(catchment_idx))]

# Package the intersection areas in a dataframe and only retain intersections with non-zero area.
intersections = pd.DataFrame({
    'identifier': catchments.index[catchment_idx],
    'LSOA11CD': lsoas.index[lsoa_idx],
    'intersection_area': intersection_areas,
})
intersections = intersections[intersections.intersection_area > 0]
print(f'retained {len(intersections)} intersections after removing zero areas')
intersections.head()
intersections.to_csv(ROOT / 'lsoa_catchment_lookup.csv', index=False)

In [None]:
coverage = {}
for lsoa_code, subset in tqdm(intersections.groupby('LSOA11CD')):
    # Get the union of all possible intersections.
    if len(subset) > 1:
        all_intersecting = catchments.loc[subset.identifier].unary_union
    else:
        identifier = subset.identifier.iloc[0]
        all_intersecting = catchments.geometry.loc[identifier]
    # Evaluate the intersection of the LSOA with any catchment by intersecting with the spatial
    # union of the catchments.
    intersection = all_intersecting.intersection(lsoas.geometry.loc[lsoa_code])
    coverage[lsoa_code] = intersection.area

coverage = pd.Series(coverage)

# Compute the coverage and fill with zeros where there are no intersections.
lsoas['area_covered'] = coverage
lsoas['area_covered'] = lsoas.area_covered.fillna(0)
lsoas['total_area'] = lsoas.area
lsoas[['total_area', 'area_covered']].to_csv(ROOT / 'lsoa_coverage.csv')