In [1]:
import requests
import os
import json
from zipfile import ZipFile
import pandas as pd
import geopandas as gpd
import pathlib

In [2]:
location = "home"

In [3]:
if location == "home":
    folder_location = "C:/Users/Lara/Work/DataDownloads/"
    
if location == "work":
    folder_location = "G:/_Projects/DataDownloads/"

download_location = folder_location + "testData/"
temp_download_location = folder_location + "testDataTemp/"
lookup_location = folder_location + "Lookups/"
item_url_file = "OS-API_URL.csv"
boundary_file = folder_location + "CountyBoundary.shp"

In [4]:
boundary_gdf = gpd.read_file(boundary_file)

In [5]:
# Read the csv file
item_url_df = pd.read_csv(lookup_location + item_url_file)
item_url_df.head(3)

Unnamed: 0,Dataset,DatasetProductName,URL,Format,Source
0,MiniScale,MiniScale,https://api.os.uk/downloads/v1/products/MiniSc...,"Zip file (containing EPS, Illustrator and TIFF...",OS
1,1:250 000 Scale Colour Raster,250kScaleColourRaster,https://api.os.uk/downloads/v1/products/250kSc...,TIFF-LZW,OS
2,Boundary-Line,BoundaryLine,https://api.os.uk/downloads/v1/products/Bounda...,ESRI速 Shapefile,OS


In [6]:
# get list of OS open data products available to download https://docs.os.uk/os-apis/accessing-os-apis/os-downloads-api/technical-specification/opendata-products
url = 'https://api.os.uk/downloads/v1/products'

response = requests.get(url)
product_list = response.json()

In [7]:
# url = "https://api.os.uk/downloads/v1/products/BuiltUpAreas/downloads?area=GB&format=GeoPackage&redirect"
# response = requests.get(url)

In [None]:
#Loop through each dataset name in lookup
for dataset in item_url_df.DatasetProductName.unique():
    print(dataset)
    # Get metadata for dataset
    dataset_details_list = [d for d in product_list if d['id'] in [dataset]]
    print(dataset_details_list[0]['version'])
    metadata = {"version":dataset_details_list[0]['version']} 

    # Product download https://docs.os.uk/os-apis/accessing-os-apis/os-downloads-api/technical-specification/download-an-opendata-product
    productId = dataset
    url_product = f"https://api.os.uk/downloads/v1//products/{productId}/downloads"
    response_product = requests.get(url_product)
    dataset_product_list = response_product.json()

    # Set format required from external csv lookup (some datasets have multiple)
    format = item_url_df[item_url_df.DatasetProductName==dataset]['Format'].item()
    # Set source for creating folder structure
    source = item_url_df[item_url_df.DatasetProductName==dataset]['Source'].item()
    # Loop through each product in dataset list
    for value in dataset_product_list:
        print(value)
        #Only interested in specified formats/ areas
        if value['format'] == format and value['area'] in ["GB","SP", "ST", "SU", "SO"]:
            # Set variables 
            dataset_url = value['url']
            temp_folder_name = f"{temp_download_location}"
            folder_name_original = f"{download_location}Original/{source}/{dataset}/"
            folder_name_wilts = f"{download_location}Wiltshire/{source}/{dataset}/"
            file_name = f"{dataset}_{value['area']}"
            
            print(dataset_url)
            try:
                response = requests.get(dataset_url)
                 # Check if request worked
                if response.status_code == 200:
                    #Create folder if not already there
                    if not os.path.exists(folder_name_original):
                        os.makedirs(folder_name_original)
                    if not os.path.exists(folder_name_wilts):
                        os.makedirs(folder_name_wilts)
                    # Create zip file in temp location
                    with open(temp_folder_name+file_name+".zip", mode="wb") as file:
                        file.write(response.content)
                    # Extract zip and move to permanent location
                    with ZipFile(temp_folder_name+file_name+".zip", 'r') as z_object:
                        z_object.extractall(path=folder_name_original)
                    # Write out metadata
                    with open(folder_name_original+"metadata.json", mode="w") as file:
                        json.dump(metadata,file)
                    

                    ## Export wiltshire data
                    
                else:
                    print(f"Request failed for {dataset}")
            except:
                print(f"Error occured for {dataset}")
           

MiniScale
2025-01
{'md5': '2ee5e66b2579c65d7fe6bd5370de186a', 'size': 311023804, 'url': 'https://api.os.uk/downloads/v1/products/MiniScale/downloads?area=GB&format=Zip+file+%28containing+EPS%2C+Illustrator+and+TIFF-LZW%29&redirect', 'format': 'Zip file (containing EPS, Illustrator and TIFF-LZW)', 'area': 'GB', 'fileName': 'minisc_gb.zip'}
https://api.os.uk/downloads/v1/products/MiniScale/downloads?area=GB&format=Zip+file+%28containing+EPS%2C+Illustrator+and+TIFF-LZW%29&redirect
250kScaleColourRaster
2025-06
{'md5': '919da89a6a46bfdee0bb59c450e4539a', 'size': 134123650, 'url': 'https://api.os.uk/downloads/v1/products/250kScaleColourRaster/downloads?area=GB&format=TIFF-LZW&redirect', 'format': 'TIFF-LZW', 'area': 'GB', 'fileName': 'ras250_gb.zip'}
https://api.os.uk/downloads/v1/products/250kScaleColourRaster/downloads?area=GB&format=TIFF-LZW&redirect
BoundaryLine
2025-05
{'md5': 'b3d8bd455c55fb34e15f334cc1e4b164', 'size': 781845741, 'url': 'https://api.os.uk/downloads/v1/products/Boundar