### start by querying overpass API to get data on Amsterdam green areas.
need to divide the city in bounding boxes to bypass API restrictions

In [None]:
import requests
import json
import time
from datetime import datetime

# Amsterdam bounding box (approximate)
# [south, west, north, east]
AMSTERDAM_BBOX = [52.278, 4.728, 52.431, 5.079]

def create_grid(bbox, rows=2, cols=3):
    """Divide bounding box into grid cells"""
    south, west, north, east = bbox
    lat_step = (north - south) / rows
    lon_step = (east - west) / cols
    
    cells = []
    for i in range(rows):
        for j in range(cols):
            cell = {
                'id': f"cell_{i}_{j}",
                'bbox': [
                    south + i * lat_step,  # south
                    west + j * lon_step,   # west
                    south + (i + 1) * lat_step,  # north
                    west + (j + 1) * lon_step    # east
                ]
            }
            cells.append(cell)
    return cells

def build_overpass_query(bbox, date_str=None, features='parks_only'):
    """Build Overpass QL query for a bounding box
    
    features options:
    - 'parks_only': just parks
    - 'parks_gardens': parks and gardens
    - 'all': all green features
    """
    south, west, north, east = bbox
    bbox_str = f"{south},{west},{north},{east}"
    
    date_param = f'[date:"{date_str}"]' if date_str else ''
    
    if features == 'parks_only':
        elements = f'nwr["leisure"="park"]({bbox_str});'
    elif features == 'parks_gardens':
        elements = f'nwr["leisure"="park"]({bbox_str});\n  nwr["leisure"="garden"]({bbox_str});'
    else:  # all
        elements = f'nwr["leisure"="park"]({bbox_str});\n  nwr["leisure"="garden"]({bbox_str});\n  nwr["landuse"="grass"]({bbox_str});\n  nwr["landuse"="forest"]({bbox_str});\n  nwr["natural"="wood"]({bbox_str});\n  nwr["natural"="tree"]({bbox_str});'
    
    query = f'[out:json][timeout:600]{date_param};\n(\n  {elements}\n);\nout tags geom;'
    return query

def fetch_overpass_data(query, max_retries=3):
    """Fetch data from Overpass API with retries"""
    url = "https://overpass-api.de/api/interpreter"
    
    for attempt in range(max_retries):
        try:
            print(f"  Attempt {attempt + 1}/{max_retries}...")
            response = requests.post(url, data={'data': query}, timeout=900)
            
            if response.status_code == 200:
                return response.json()
            elif response.status_code == 429:
                wait_time = 60 * (attempt + 1)
                print(f"  Rate limited. Waiting {wait_time}s...")
                time.sleep(wait_time)
            else:
                print(f"  Error {response.status_code}: {response.text[:500]}")
                time.sleep(30)
                
        except requests.exceptions.Timeout:
            print(f"  Timeout on attempt {attempt + 1}")
            time.sleep(30)
        except Exception as e:
            print(f"  Error: {str(e)}")
            time.sleep(30)
    
    return None

def save_data(data, filename):
    """Save data to JSON file"""
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"  Saved to {filename}")

def main():
    # Create grid (2x3 = 6 cells)
    cells = create_grid(AMSTERDAM_BBOX, rows=2, cols=3)
    
    # Years to fetch
    years = {
        '2020': '2020-01-01T00:00:00Z',
        '2024': '2024-01-01T00:00:00Z'
    }
    
    # Choose features: 'parks_only', 'parks_gardens', or 'all'
    features = 'parks_only'  # Change this if needed
    
    print(f"Fetching data for {len(cells)} grid cells x {len(years)} time points")
    print(f"Features: {features}")
    print(f"Total queries: {len(cells) * len(years)}\n")
    
    # Fetch data for each cell and year
    for year, date_str in years.items():
        print(f"\n{'='*60}")
        print(f"YEAR: {year} ({date_str})")
        print(f"{'='*60}\n")
        
        all_elements = []
        
        for cell in cells:
            cell_id = cell['id']
            print(f"\nProcessing {cell_id} for {year}...")
            print(f"  BBox: {cell['bbox']}")
            
            query = build_overpass_query(cell['bbox'], date_str, features)
            
            # Fetch data
            data = fetch_overpass_data(query)
            
            if data and 'elements' in data:
                print(f"  Retrieved {len(data['elements'])} elements")
                # Add cell_id to each element for reference
                for elem in data['elements']:
                    elem['grid_cell'] = cell_id
                all_elements.extend(data['elements'])
            else:
                print(f"  Failed to retrieve data")
            
            # Be nice to the API
            print(f"  Waiting 5 seconds before next request...")
            time.sleep(5)
        
        # Save combined data
        output = {
            'timestamp': datetime.now().isoformat(),
            'year': year,
            'date_query': date_str,
            'features': features,
            'num_elements': len(all_elements),
            'elements': all_elements
        }
        
        filename = f"../data/amsterdam_green_{year}_{features}.json"
        save_data(output, filename)
        
        print(f"\n{year} Summary: {len(all_elements)} total elements")
    
    print("\n" + "="*60)
    print("COMPLETE!")
    print("="*60)

if __name__ == "__main__":
    # Print example query for testing
    print("EXAMPLE QUERY FOR TESTING (Cell 0_0, Year 2020, PARKS ONLY):")
    print("="*60)
    cells = create_grid(AMSTERDAM_BBOX, rows=2, cols=3)
    example_query = build_overpass_query(cells[0]['bbox'], '2020-01-01T00:00:00Z', features='parks_only')
    print(example_query)
    print("="*60)
    print("\nCopy the query above and test it at: https://overpass-turbo.eu/")
    print("\nTo run the full script, uncomment the line below:")
    print("# main()")
    
    # Uncomment to run full fetch:
main()

EXAMPLE QUERY FOR TESTING (Cell 0_0, Year 2020, PARKS ONLY):
[out:json][timeout:600][date:"2020-01-01T00:00:00Z"];
(
  nwr["leisure"="park"](52.278,4.728,52.3545,4.845);
);
out tags geom;

Copy the query above and test it at: https://overpass-turbo.eu/

To run the full script, uncomment the line below:
# main()
Fetching data for 6 grid cells x 2 time points
Features: parks_only
Total queries: 12


YEAR: 2020 (2020-01-01T00:00:00Z)


Processing cell_0_0 for 2020...
  BBox: [52.278, 4.728, 52.3545, 4.845]
  Attempt 1/3...
  Retrieved 32 elements
  Waiting 5 seconds before next request...

Processing cell_0_1 for 2020...
  BBox: [52.278, 4.845, 52.3545, 4.962]
  Attempt 1/3...
  Retrieved 112 elements
  Waiting 5 seconds before next request...

Processing cell_0_2 for 2020...
  BBox: [52.278, 4.962, 52.3545, 5.079]
  Attempt 1/3...
  Error 504: <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict

### now adding forests and "wooded areas" (=aggregates of trees)

In [None]:
# Amsterdam bounding box (approximate)
# [south, west, north, east]
AMSTERDAM_BBOX = [52.278, 4.728, 52.431, 5.079]

def create_grid(bbox, rows=2, cols=3):
    """Divide bounding box into grid cells"""
    south, west, north, east = bbox
    lat_step = (north - south) / rows
    lon_step = (east - west) / cols
    
    cells = []
    for i in range(rows):
        for j in range(cols):
            cell = {
                'id': f"cell_{i}_{j}",
                'bbox': [
                    south + i * lat_step,  # south
                    west + j * lon_step,   # west
                    south + (i + 1) * lat_step,  # north
                    west + (j + 1) * lon_step    # east
                ]
            }
            cells.append(cell)
    return cells

def build_overpass_query(bbox, date_str=None, features='parks_only'):
    """Build Overpass QL query for a bounding box
    
    features options:
    - 'parks_only': just parks
    - 'wood_forest': wooded areas and forests
    - 'parks_gardens': parks and gardens
    - 'all': all green features
    """
    south, west, north, east = bbox
    bbox_str = f"{south},{west},{north},{east}"
    
    date_param = f'[date:"{date_str}"]' if date_str else ''
    
    if features == 'parks_only':
        elements = f'nwr["leisure"="park"]({bbox_str});'
    elif features == 'wood_forest':
        elements = f'nwr["natural"="wood"]({bbox_str});\n  nwr["landuse"="forest"]({bbox_str});'
    elif features == 'parks_gardens':
        elements = f'nwr["leisure"="park"]({bbox_str});\n  nwr["leisure"="garden"]({bbox_str});'
    else:  # all
        elements = f'nwr["leisure"="park"]({bbox_str});\n  nwr["leisure"="garden"]({bbox_str});\n  nwr["landuse"="grass"]({bbox_str});\n  nwr["landuse"="forest"]({bbox_str});\n  nwr["natural"="wood"]({bbox_str});\n  nwr["natural"="tree"]({bbox_str});'
    
    query = f'[out:json][timeout:600]{date_param};\n(\n  {elements}\n);\nout tags geom;'
    return query

def fetch_overpass_data(query, max_retries=3):
    """Fetch data from Overpass API with retries"""
    url = "https://overpass-api.de/api/interpreter"
    
    for attempt in range(max_retries):
        try:
            print(f"  Attempt {attempt + 1}/{max_retries}...")
            response = requests.post(url, data={'data': query}, timeout=900)
            
            if response.status_code == 200:
                return response.json()
            elif response.status_code == 429:
                wait_time = 60 * (attempt + 1)
                print(f"  Rate limited. Waiting {wait_time}s...")
                time.sleep(wait_time)
            else:
                print(f"  Error {response.status_code}: {response.text[:500]}")
                time.sleep(30)
                
        except requests.exceptions.Timeout:
            print(f"  Timeout on attempt {attempt + 1}")
            time.sleep(30)
        except Exception as e:
            print(f"  Error: {str(e)}")
            time.sleep(30)
    
    return None

def save_data(data, filename):
    """Save data to JSON file"""
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"  Saved to {filename}")

def main():
    # Create grid (2x3 = 6 cells)
    cells = create_grid(AMSTERDAM_BBOX, rows=2, cols=3)
    
    # Years to fetch
    years = {
        '2020': '2020-01-01T00:00:00Z',
        '2024': '2024-01-01T00:00:00Z'
    }
    
    # Choose features: 'parks_only', 'wood_forest', 'parks_gardens', or 'all'
    features = 'wood_forest'  # Change this if needed
    
    print(f"Fetching data for {len(cells)} grid cells x {len(years)} time points")
    print(f"Features: {features}")
    print(f"Total queries: {len(cells) * len(years)}\n")
    
    # Fetch data for each cell and year
    for year, date_str in years.items():
        print(f"\n{'='*60}")
        print(f"YEAR: {year} ({date_str})")
        print(f"{'='*60}\n")
        
        all_elements = []
        
        for cell in cells:
            cell_id = cell['id']
            print(f"\nProcessing {cell_id} for {year}...")
            print(f"  BBox: {cell['bbox']}")
            
            query = build_overpass_query(cell['bbox'], date_str, features)
            
            # Fetch data
            data = fetch_overpass_data(query)
            
            if data and 'elements' in data:
                print(f"  Retrieved {len(data['elements'])} elements")
                # Add cell_id to each element for reference
                for elem in data['elements']:
                    elem['grid_cell'] = cell_id
                all_elements.extend(data['elements'])
            else:
                print(f"  Failed to retrieve data")
            
            # Be nice to the API
            print(f"  Waiting 5 seconds before next request...")
            time.sleep(5)
        
        # Save combined data
        output = {
            'timestamp': datetime.now().isoformat(),
            'year': year,
            'date_query': date_str,
            'features': features,
            'num_elements': len(all_elements),
            'elements': all_elements
        }
        
        filename = f"../data/amsterdam_green_{year}_{features}.json"
        save_data(output, filename)
        
        print(f"\n{year} Summary: {len(all_elements)} total elements")
    
    print("\n" + "="*60)
    print("COMPLETE!")
    print("="*60)

if __name__ == "__main__":
    # Print example query for testing
    print("EXAMPLE QUERY FOR TESTING (Cell 0_0, Year 2020, WOOD & FOREST):")
    print("="*60)
    cells = create_grid(AMSTERDAM_BBOX, rows=2, cols=3)
    example_query = build_overpass_query(cells[0]['bbox'], '2020-01-01T00:00:00Z', features='wood_forest')
    print(example_query)
    print("="*60)
    print("\nCopy the query above and test it at: https://overpass-turbo.eu/")
    print("\nTo run the full script, uncomment the line below:")
    print("# main()")
    
    # Uncomment to run full fetch:
main()

EXAMPLE QUERY FOR TESTING (Cell 0_0, Year 2020, WOOD & FOREST):
[out:json][timeout:600][date:"2020-01-01T00:00:00Z"];
(
  nwr["natural"="wood"](52.278,4.728,52.3545,4.845);
  nwr["landuse"="forest"](52.278,4.728,52.3545,4.845);
);
out tags geom;

Copy the query above and test it at: https://overpass-turbo.eu/

To run the full script, uncomment the line below:
# main()
Fetching data for 6 grid cells x 2 time points
Features: wood_forest
Total queries: 12


YEAR: 2020 (2020-01-01T00:00:00Z)


Processing cell_0_0 for 2020...
  BBox: [52.278, 4.728, 52.3545, 4.845]
  Attempt 1/3...
  Retrieved 1518 elements
  Waiting 5 seconds before next request...

Processing cell_0_1 for 2020...
  BBox: [52.278, 4.845, 52.3545, 4.962]
  Attempt 1/3...
  Error 504: <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <meta http-eq

### some data was obtained through other means

- download from CSB OpenStatLine
- email correspondence with Onderzoek en Statistiek (O&S) of the city of Amsterdam

specifically:

- buurten.csv file containing a mapping of codenames from 2015 to 2022 for Amsterdam's buurten, the wijk they belong to, as well as coordinates of their centroids and geometry of the polygon they're represented by on a map
- the files Kerncijfers_wijken_en_buurten_2024_without_income_data.csv, Kerncijfers_wijken_en_buurten_2023_income.csv, Kerncijfers_wijken_en_buurten_2020, containing socio-economic data on Amsterdam's neighborhoods. note that due to the absence of income values for 2024 in the datasets, I'm substituting it with the income values from the previous year
- the files proximity_to_facilities_2020.csv and proximity_to_facilities_2024.csv, containing data on distance and number to a variety of store types and train stations for each buurt