# MapReader with masking

In [None]:
# https://github.com/maps-as-data/MapReader/blob/main/worked_examples/geospatial/context_classification_one_inch_maps/Pipeline.ipynb

In [None]:
# requuires os opendata plan (FREE) https://osdatahub.os.uk/plans
# osdatahub library documentation https://github.com/OrdnanceSurvey/osdatahub
# data docs https://www.ordnancesurvey.co.uk/documents/apis-on-a-page.pdf

### Setup

In [None]:
# pip install osdatahub
# conda install -c conda-forge osdatahub

You'll also need to sign-up for an account on the OS Data Hub and get an API key. If you've setup you're account and need help getting a key, try the following steps:

1. Navigate to the API Dashboard located on the top navigation bar
2. Go to My Projects
3. Click Create a new project, give your project a name, then click Create project
4. Select Add an API to this project
5. Choose the APIs you would like to use and click Done (Note: osdatahub supports the OS Features, Places, Names, Linked Identifiers, and Downloads APIs)


Free API Key:

Daily Limit: 1,000 API requests per day.
Data Limit per Request: Typically limited to 10,000 features (or 1,000 features per request for certain products like the Features API).
Rate Limiting: Maximum of 10 requests per second

# Code

In [1]:
from osdatahub import NGD
from osdatahub.FeaturesAPI import FeaturesAPI
from shapely.geometry import Polygon
from osdatahub.extent import Extent
from pyproj import Transformer
import geopandas as gpd
import pandas as pd

In [2]:
class OSDataDownloader:
    def __init__(self, api_key, metadata_csv):
        """
        Initialize the OSDataDownloader class.

        Parameters:
        - api_key: Your OS Data Hub API key.
        - metadata_csv: Path to the metadata CSV file containing bounding box information.
        """
        self.api_key = api_key
        self.metadata_csv = metadata_csv

    def get_png_metadata(self, png_name):
        """
        Extract the metadata for a specific PNG from the metadata CSV file.

        Parameters:
        - png_name: Name of the PNG file (e.g., "map_101433939.png").

        Returns:
        - metadata: Dictionary containing bounding box and other metadata.
        """
        metadata = pd.read_csv(self.metadata_csv)
        row = metadata[metadata["name"] == png_name]
        if row.empty:
            raise ValueError(f"No metadata found for {png_name}")

        coords = eval(row["coordinates"].iloc[0])  # Convert string to tuple
        return {
            "bounding_box": coords,
            "crs": row["crs"].iloc[0],
            "image_name": row["name"].iloc[0],
        }

    def transform_bounding_box(self, bbox, from_crs="EPSG:4326", to_crs="EPSG:27700"):
        """
        Transforms a bounding box from one CRS to another.

        Parameters:
        - bbox: Tuple of (xmin, ymin, xmax, ymax) in the source CRS.
        - from_crs: Source CRS (default is EPSG:4326).
        - to_crs: Target CRS (default is EPSG:27700).

        Returns:
        - Transformed bounding box as (xmin, ymin, xmax, ymax).
        """
        transformer = Transformer.from_crs(from_crs, to_crs, always_xy=True)
        xmin, ymin = transformer.transform(bbox[0], bbox[1])
        xmax, ymax = transformer.transform(bbox[2], bbox[3])
        return (xmin, ymin, xmax, ymax)

    def initialize_features_api(self, product_name, extent="GB", crs="EPSG:27700"):
        """
        Initialize the Ordnance Survey FeaturesAPI client for the specified product.

        Parameters:
        - product_name: The open data product to access.
        - extent: The spatial extent to use (default is "GB" for Great Britain).
        - crs: The CRS for the extent (default is "EPSG:27700").

        Returns:
        - features_api: An initialized FeaturesAPI client.
        """
        if extent == "GB":
            extent_obj = Extent.from_predefined("GB")
        else:
            xmin, ymin, xmax, ymax = extent
            polygon = Polygon(
                [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), (xmin, ymin)]
            )
            extent_obj = Extent(polygon, crs)

        features_api = FeaturesAPI(
            key=self.api_key, product_name=product_name, extent=extent_obj
        )
        return features_api

    def query_os_data(self, features_api, product_filter=None):
        """
        Query Ordnance Survey open data using the FeaturesAPI client and filter by product name.

        Parameters:
        - features_api: An initialized FeaturesAPI client.
        - product_filter: The name of the OS open data product to filter.

        Returns:
        - gdf: GeoDataFrame with the queried data or None if no data is found.
        """
        response = features_api.query()
        if "features" not in response or not response["features"]:
            print("No data found for this bounding box.")
            return None

        gdf = gpd.GeoDataFrame.from_features(response["features"])
        if "geometry" not in gdf.columns:
            print("The API response does not include a 'geometry' column.")
            return None

        gdf = gdf.set_geometry("geometry")
        gdf.crs = "EPSG:4326"

        if product_filter:
            gdf = gdf[gdf["product"] == product_filter]
            if gdf.empty:
                print(
                    f"No data found for product '{product_filter}' within the extent."
                )

        return gdf

    def download_os_data(
        self, png_name, product_name, output_file=None, product_filter=None
    ):
        """
        Download Ordnance Survey open data for the specified PNG and product.

        Parameters:
        - png_name: Name of the PNG file.
        - product_name: The OS open data product to query.
        - output_file: Optional path to save the queried data.
        - product_filter: The name of the OS open data product to filter.

        Returns:
        - gdf: GeoDataFrame with the queried data or None if no data is found.
        """
        metadata = self.get_png_metadata(png_name)
        bbox_27700 = self.transform_bounding_box(metadata["bounding_box"])

        features_api = self.initialize_features_api(
            product_name, extent=bbox_27700, crs="EPSG:27700"
        )
        gdf = self.query_os_data(features_api, product_filter)

        if gdf is not None and output_file:
            gdf.to_file(
                output_file,
                driver="GeoJSON"
                if output_file.endswith(".geojson")
                else "ESRI Shapefile",
            )

        return gdf

## Testing Code

In [7]:
# Initialize downloader
downloader = OSDataDownloader(
    api_key="oX1WR2nDOAzhICIRNQWhAhBP2mtQp8I8",
    metadata_csv="./data/6_inch_png/metadata.csv",
)

# Query and download data
gdf = downloader.download_os_data(
    png_name="map_101433939.png",  # this png has data
    # png_name="map_101433990.png",  # this png has no data
    product_name="zoomstack_urban_areas",
    output_file="./built_up_areas.geojson",
)

if gdf is None:
    print("No data returned for the specified bounding box.")
else:
    print("Downloaded data:")
    print(gdf.head())

Downloaded data:
                                            geometry  \
0  MULTIPOLYGON (((479737.59080 127263.65930, 479...   
1  MULTIPOLYGON (((479929.86460 128309.90930, 479...   

                       GmlID  OBJECTID      Type  SHAPE_Length    SHAPE_Area  
0  Zoomstack_UrbanAreas.4935      4935  Regional  19589.185579  3.894567e+06  
1  Zoomstack_UrbanAreas.5012      5012  National  13464.997625  8.670046e+06  
