In [14]:
import requests
import os
import json
from zipfile import ZipFile
import pandas as pd
import geopandas as gpd
from datetime import datetime, timezone, date

In [2]:
location = "home"

In [3]:
if location == "home":
    folder_location = "C:/Users/Lara/Work/DataDownloads/"
    
if location == "work":
    folder_location = "O:/Data_team/GIS_data_downloads/"

download_location = folder_location + "testData/"
temp_download_location = folder_location + "testDataTemp/"
lookup_location = folder_location + "Lookups/"
item_url_file = "ItemURL_ArcGIS_6monthUpdates.csv"
boundary_file = folder_location + "CountyBoundary.shp"


In [4]:
boundary_gdf = gpd.read_file(boundary_file)

In [5]:
# Read the csv file 
item_url_df = pd.read_csv(lookup_location + item_url_file)
item_url_df.head(3)

Unnamed: 0,Dataset,Owner,URL
0,Local Nature Reserves (England),NE,https://services.arcgis.com/JJzESW51TqeY9uat/a...
1,National Nature Reserves (England),NE,https://services.arcgis.com/JJzESW51TqeY9uat/a...
2,Priority Habitats Inventory (England),NE,https://services.arcgis.com/JJzESW51TqeY9uat/a...


In [None]:
#Loop through each dataset name in lookup
for url in item_url_df.tail(2).URL:
    print(url)
    # Get url that contains details for dataset
    url_details = url.replace("/query?", "?f=json")
    
    # Get dataset name and source from lookup
    dataset = item_url_df[item_url_df.URL==url]['Dataset'].item() 
    source = item_url_df[item_url_df.URL==url]['Owner'].item()

    # Parameters
    batch_size = 2000  # Adjust to the max allowed (e.g., 4000 if needed)

    params = {
        "where": "1=1",
        "outFields": "*",
        "f": "geojson",
        "resultOffset": 0,
        "resultRecordCount": batch_size,  # You can go up to the server max, in this case 4000
    }

    features = []
    
    # Keeps running until server stops returning features
    while True:
        #print(f"Requesting records starting at offset {params['resultOffset']}")
        try:
            response = requests.get(url, params=params)
            # Check if response is successful
            if response.status_code != 200:
                print(f"Request failed with status code: {response.status_code}")
                break
            
            data = response.json()

            if "features" not in data or not data["features"]:
                break  # No more data

            features.extend(data["features"])
            params["resultOffset"] += batch_size
        
        except requests.exceptions.RequestException as e:
            print("Error during request:", e)
            break

    print(f"Total features retrieved: {len(features)}")
    #print(features)
    # Save to a GeoJSON file
    with open(f"{temp_download_location}{dataset}.geojson", "w") as f:
        json.dump({
            "type": "FeatureCollection",
            "features": features
        }, f)
   
    # Create folder if not already there for permanent data storage
    if not os.path.exists(f"{download_location}Original/{source}/{dataset}"):
        os.makedirs(f"{download_location}Original/{source}/{dataset}")
    if not os.path.exists(f"{download_location}Wiltshire/{source}/{dataset}"):
        os.makedirs(f"{download_location}Wiltshire/{source}/{dataset}")

    # Need to read geojson back in then write out the gpkg, as arcgis does not read geojson
    gdf = gpd.read_file(f"{temp_download_location}{dataset}.geojson")

    # Convert to BNG as geojson defaults to EPSG: 4326
    gdf_27700 = gdf.to_crs(epsg=27700)

    # Write out original file
    gdf.to_file(f"{download_location}Original/{source}/{dataset}/{dataset}.gpkg")

    # Clip to Wilts and write out
    gdf_wilts = gpd.clip(gdf_27700, boundary_gdf)
    gdf_wilts.to_file(f"{download_location}Wiltshire/{source}/{dataset}/{dataset}.gpkg")

    # Download dataset details
    response_details = requests.get(url_details)
    if response_details.status_code == 200:
        details = response_details.json()
        date_last_edit = details['editingInfo']['dataLastEditDate']
        dt = datetime.fromtimestamp(date_last_edit / 1000, tz=timezone.utc)
        metadata = {"Date last edited":dt.strftime("%d/%m/%Y"),
                    "Date downloaded":date.today().strftime('%m/%d/%Y')} 
        with open(f"{download_location}Original/{source}/{dataset}_metadata.json", mode="w") as file:
            json.dump(metadata,file)
    else:
        print(f"Request failed for details of {dataset}")



https://services.arcgis.com/JJzESW51TqeY9uat/arcgis/rest/services/SSSI_Impact_Risk_Zones_England/FeatureServer/0/query?
Total features retrieved: 103922
Date last edited: 25/09/2025
2025-10-01
https://services-eu1.arcgis.com/WIfgdJeDbrZU1cnA/arcgis/rest/services/Ancient%20Tree%20Inventory%20(ATI)/FeatureServer/0/query?
Total features retrieved: 97914
Date last edited: 23/09/2025
2025-10-01
