In [2]:
import requests
import os
import json
from zipfile import ZipFile
import pandas as pd
import geopandas as gpd
import pathlib
from datetime import date

In [3]:
location = "home"

In [4]:
if location == "home":
    folder_location = "C:/Users/Lara/Work/DataDownloads/"
    
if location == "work":
    folder_location = "O:/Data_team/GIS_data_downloads/"

download_location = folder_location + "testData/"
temp_download_location = folder_location + "testDataTemp/"
lookup_location = folder_location + "Lookups/"
item_url_file = "OS-API_URL.csv"
boundary_file = folder_location + "CountyBoundary.shp"

In [5]:
boundary_gdf = gpd.read_file(boundary_file)

In [6]:
# Read the csv file
item_url_df = pd.read_csv(lookup_location + item_url_file)
item_url_df.head(3)

Unnamed: 0,Dataset,DatasetProductName,URL,Format,Source
0,MiniScale,MiniScale,https://api.os.uk/downloads/v1/products/MiniSc...,"Zip file (containing EPS, Illustrator and TIFF...",OS
1,1:250 000 Scale Colour Raster,250kScaleColourRaster,https://api.os.uk/downloads/v1/products/250kSc...,TIFF-LZW,OS
2,Boundary-Line,BoundaryLine,https://api.os.uk/downloads/v1/products/Bounda...,GeoPackage,OS


In [7]:
# get list of OS open data products available to download https://docs.os.uk/os-apis/accessing-os-apis/os-downloads-api/technical-specification/opendata-products
url = 'https://api.os.uk/downloads/v1/products'

response = requests.get(url)
product_list = response.json()

In [10]:
#Loop through each dataset name in lookup
for dataset in ["OpenMapLocal"]:#item_url_df.DatasetProductName.tail(5):
    print(dataset)
    # Get metadata for dataset
    dataset_details_list = [d for d in product_list if d['id'] in [dataset]]
    metadata = {"Version":dataset_details_list[0]['version'],
                "Date downloaded":date.today().strftime('%m/%d/%Y')} 

    # Product download https://docs.os.uk/os-apis/accessing-os-apis/os-downloads-api/technical-specification/download-an-opendata-product
    productId = dataset
    url_product = f"https://api.os.uk/downloads/v1//products/{productId}/downloads"
    response_product = requests.get(url_product)
    dataset_product_list = response_product.json()
    print(dataset_product_list)
    # Set format required from external csv lookup (some datasets have multiple)
    format = item_url_df[item_url_df.DatasetProductName==dataset]['Format'].item()
    # Set source for creating folder structure
    source = item_url_df[item_url_df.DatasetProductName==dataset]['Source'].item()

    # Loop through each product in dataset list
    for value in dataset_product_list:
        #Only interested in specified formats/ areas
        if value['format'] == format and value['area'] in ["GB"]:
            # Set variables 
            dataset_url = value['url']
            temp_folder_name = f"{temp_download_location}"
            folder_name_original = f"{download_location}Original/{source}/{dataset}/"
            folder_name_wilts = f"{download_location}Wiltshire/{source}/{dataset}/"
            file_name = f"{dataset}_{value['area']}"
            
            print(dataset_url)
            try:
                response = requests.get(dataset_url)
                    # Check if request worked
                if response.status_code == 200:
                    #Create folder if not already there
                    if not os.path.exists(folder_name_original):
                        os.makedirs(folder_name_original)
                    # if not os.path.exists(folder_name_wilts):
                    #     os.makedirs(folder_name_wilts)
                    # Create zip file in temp location
                    with open(temp_folder_name+file_name+".zip", mode="wb") as file:
                        file.write(response.content)
                    # Extract zip and move to permanent location
                    with ZipFile(temp_folder_name+file_name+".zip", 'r') as z_object:
                        z_object.extractall(path=folder_name_original)
                    # Write out metadata
                    with open(folder_name_original+"metadata.json", mode="w") as file:
                        json.dump(metadata,file)
                    
                    ## Export wiltshire data
                    if value['format'] == "GeoPackage":
                        # Get list of spatial files downloaded
                        data_loc = pathlib.Path(f"{download_location}Original/{source}/{dataset}")
                        shp_list = list(data_loc.rglob("*.gpkg"))
                        # Loop through each spatial file
                        for file in shp_list:
                            # Check folder exists, and if not create it
                            wilts_file_location = pathlib.Path(str(file).replace("Original", "Wiltshire"))
                            if not os.path.exists(wilts_file_location.parent):
                                os.makedirs(wilts_file_location.parent)
                            # Remove file if it already exists as geopackage writes onto existing file
                            if os.path.exists(wilts_file_location):
                                os.remove(wilts_file_location)
                            # Loop through each layer in file, clip to wilts and write to new location
                            gpd_layers = gpd.list_layers(file)
                            count = 0
                            for i, layer in enumerate(gpd_layers.name):
                                gdf = gpd.read_file(file, layer = layer)
                                gdf_wilts = gpd.clip(gdf, boundary_gdf)
                                # Check that geodataframe contains data
                                if gdf_wilts.empty == False:
                                    gdf_wilts.to_file(wilts_file_location,layer = layer, driver = "GPKG", mode = "w" if count==0 else "a")
                                    count=+1
                            
                            with open(folder_name_wilts+"metadata.json", mode="w") as file:
                                json.dump(metadata,file)
                    
                    
                        
                else:
                    print(f"Request failed for {dataset}")
            except Exception as error:
                print(f"Error occured for {dataset}:", error)
           

OpenMapLocal
[{'md5': '1ad088efa748287080fe01e24ee92613', 'size': 1322829, 'url': 'https://api.os.uk/downloads/v1/products/OpenMapLocal/downloads?area=HP&format=ESRI%C2%AE+Shapefile&redirect', 'format': 'ESRI® Shapefile', 'area': 'HP', 'fileName': 'opmplc_essh_hp.zip'}, {'md5': '7a635c511f981458cfd61f11cb70c27a', 'size': 110506, 'url': 'https://api.os.uk/downloads/v1/products/OpenMapLocal/downloads?area=HT&format=ESRI%C2%AE+Shapefile&redirect', 'format': 'ESRI® Shapefile', 'area': 'HT', 'fileName': 'opmplc_essh_ht.zip'}, {'md5': '3f2c7318339ff357e4ce448831d5b624', 'size': 11568106, 'url': 'https://api.os.uk/downloads/v1/products/OpenMapLocal/downloads?area=HU&format=ESRI%C2%AE+Shapefile&redirect', 'format': 'ESRI® Shapefile', 'area': 'HU', 'fileName': 'opmplc_essh_hu.zip'}, {'md5': '5af818cdbb90da0884623d4b7278564e', 'size': 64652, 'url': 'https://api.os.uk/downloads/v1/products/OpenMapLocal/downloads?area=HW&format=ESRI%C2%AE+Shapefile&redirect', 'format': 'ESRI® Shapefile', 'area': '