In [33]:
import requests
import os
import json
from zipfile import ZipFile
import pandas as pd
import geopandas as gpd
from datetime import datetime, timezone, date

In [34]:
location = "work"

In [35]:
if location == "home":
    folder_location = "C:/Users/Lara/Work/DataDownloads/"
    
if location == "work":
    folder_location = "O:/Data_team/GIS_data_downloads/"

download_location = folder_location + "testData/"
temp_download_location = folder_location + "testDataTemp/"
lookup_location = folder_location + "Lookups/"
item_url_file = "ItemURL_ArcGIS_6monthUpdates.csv"
boundary_file = folder_location + "CountyBoundary.shp"


In [36]:
boundary_gdf = gpd.read_file(boundary_file)

In [37]:
# Read the csv file 
item_url_df = pd.read_csv(lookup_location + item_url_file)
item_url_df.head(1)

Unnamed: 0,Dataset,Owner,URL
0,Local Nature Reserves (England),NE,https://services.arcgis.com/JJzESW51TqeY9uat/a...


In [44]:
#Loop through each dataset name in lookup
for url in ["https://services.arcgis.com/JJzESW51TqeY9uat/arcgis/rest/services/Priority_Habitats_Inventory_England/FeatureServer/0/query?"]:#item_url_df.URL:
    print(url)
    # Get url that contains details for dataset
    url_details = url.replace("/query?", "?f=json")
    
    # Get dataset name and source from lookup
    dataset = item_url_df[item_url_df.URL==url]['Dataset'].item() 
    source = item_url_df[item_url_df.URL==url]['Owner'].item()

    # Parameters
    batch_size = 2000  # Adjust to the max allowed (e.g., 4000 if needed)

    params = {
        "where": "1=1",
        "outFields": "*",
        "f": "geojson",
        "resultOffset": 0,
        #"resultRecordCount": batch_size,  # You can go up to the server max, in this case 4000
    }

    features = []
    
    # Keeps running until server stops returning features (for files that are too large so require pagination)
    #print(f"Requesting records starting at offset {params['resultOffset']}")
    try:
        response = requests.get(url, params=params, stream = True)
        # Check if response is successful
        if response.status_code != 200:
            print(f"Request failed with status code: {response.status_code}")
            break
        
        # Download in chunks in case its large file
        with open(f"{temp_download_location}{dataset}.geojson", mode="wb") as file:
            for chunk in response.iter_content(chunk_size=batch_size):
                file.write(chunk)
    
    except requests.exceptions.RequestException as e:
        print("Error during request:", e)
        break
   
    # Create folder if not already there for permanent data storage
    if not os.path.exists(f"{download_location}Original/{source}/{dataset}"):
        os.makedirs(f"{download_location}Original/{source}/{dataset}")
    if not os.path.exists(f"{download_location}Wiltshire/{source}/{dataset}"):
        os.makedirs(f"{download_location}Wiltshire/{source}/{dataset}")

    # Need to read geojson back in then write out the gpkg, as arcgis does not read geojson
    gdf = gpd.read_file(f"{temp_download_location}{dataset}.geojson")

    # Convert to BNG as geojson defaults to EPSG: 4326
    gdf_27700 = gdf.to_crs(epsg=27700)

    if os.path.exists(f"{download_location}Original/{source}/{dataset}/{dataset}.gpkg"):
                    os.remove(f"{download_location}Original/{source}/{dataset}/{dataset}.gpkg")
    # Write out original file
    gdf.to_file(f"{download_location}Original/{source}/{dataset}/{dataset}.gpkg")

    # Clip to Wilts and write out
    gdf_wilts = gpd.clip(gdf_27700, boundary_gdf)
    gdf_wilts.to_file(f"{download_location}Wiltshire/{source}/{dataset}/{dataset}.gpkg")

    # Download dataset details
    response_details = requests.get(url_details)
    if response_details.status_code == 200:
        details = response_details.json()
        date_last_edit = details['editingInfo']['dataLastEditDate']
        dt = datetime.fromtimestamp(date_last_edit / 1000, tz=timezone.utc)
        metadata = {"Date last edited":dt.strftime("%d/%m/%Y"),
                    "Date downloaded":date.today().strftime('%m/%d/%Y')} 
        with open(f"{download_location}Original/{source}/{dataset}/metadata.json", mode="w") as file:
            json.dump(metadata,file)
    else:
        print(f"Request failed for details of {dataset}")



https://services.arcgis.com/JJzESW51TqeY9uat/arcgis/rest/services/Priority_Habitats_Inventory_England/FeatureServer/0/query?


In [30]:
url = "https://services.arcgis.com/JJzESW51TqeY9uat/arcgis/rest/services/Special_Protection_Areas_England/FeatureServer/0/query?"
params =  {
        "where": "1=1",
        "outFields": "*",
        "f": "geojson",
        "resultOffset": 0,  # You can go up to the server max, in this case 4000
    }
response = requests.get(url,  stream = True, params = params)
with open("test.geojson", mode="wb") as file:
    for chunk in response.iter_content(chunk_size=10 * 1024):
        file.write(chunk)

In [26]:
params =  {
        "where": "1=1",
        "outFields": "*",
        "f": "geojson",
          # You can go up to the server max, in this case 4000
    }
url = "https://environment.data.gov.uk/spatialdata/sites-of-special-scientific-interest-england/ogc/features/v1/collections"
response = requests.get(url)

In [27]:
info = response.content

In [28]:
json_data = json.loads(info.decode('utf-8'))

In [32]:
json_data['collections']

[{'id': 'Sites_of_Special_Scientific_Interest_England',
  'title': 'Sites_of_Special_Scientific_Interest_England',
  'description': None,
  'extent': {'spatial': {'crs': 'http://www.opengis.net/def/crs/OGC/1.3/CRS84',
    'bbox': [[-7.053840553627759,
      49.86262197062069,
      2.0581712684399815,
      55.810663626083226]]}},
  'links': [{'href': 'https://environment.data.gov.uk/geoservices/datasets/ba8dc201-66ef-4983-9d46-7378af21027e/ogc/features/v1/collections/Sites_of_Special_Scientific_Interest_England/items?f=text%2Fhtml',
    'rel': 'items',
    'type': 'text/html',
    'title': 'Sites_of_Special_Scientific_Interest_England items as text/html'},
   {'href': 'https://environment.data.gov.uk/geoservices/datasets/ba8dc201-66ef-4983-9d46-7378af21027e/ogc/features/v1/collections/Sites_of_Special_Scientific_Interest_England/items?f=application%2Fvnd.google-earth.kml%2Bxml',
    'rel': 'items',
    'type': 'application/vnd.google-earth.kml+xml',
    'title': 'Sites_of_Special_Scie

In [17]:
response.json.to_html('test.html')

AttributeError: 'function' object has no attribute 'to_html'

In [None]:
https://environment.data.gov.uk/spatialdata/sites-of-special-scientific-interest-england/ogc/features/v1

JSONDecodeError: Expecting value: line 1 column 1 (char 0)