In [1]:
!pip install requests pandas tqdm
#requests - to make API calls

Defaulting to user installation because normal site-packages is not writeable


# Testing out the Overpass API

In [3]:
import requests
import pandas as pd
import time
from tqdm import tqdm

# Overpass API URL
OVERPASS_URL = "http://overpass-api.de/api/interpreter"

# Query to fetch all country boundaries
QUERY = """
[out:json];
relation["boundary"="administrative"]["admin_level"="2"];
out center;
"""

def fetch_osm_data():
    """Fetches country data from OpenStreetMap Overpass API."""
    print("Fetching data from Overpass API...")
    start_time = time.time()
    response = requests.get(OVERPASS_URL, params={"data": QUERY})
    elapsed_time = time.time() - start_time
    print(f"Data fetched in {elapsed_time:.2f} seconds.")

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        return None

def parse_osm_data(data):
    """Parses OSM JSON response into structured country data."""
    print("Processing country data...")
    start_time = time.time()
    country_data = []
    
    for element in tqdm(data["elements"], desc="Processing Countries"):
        tags = element.get("tags", {})
        country = {
            "osm_id": element["id"],
            "population": tags.get("population", "N/A"),  
            "source": tags.get("source", "N/A"),  
            "place": tags.get("place", "N/A"),  
            "is_in": tags.get("is_in", "N/A"),  
            "name": tags.get("name", "Unknown"),  
            "name_ps": tags.get("name:ps", "N/A"),  
            "longitude": element.get("center", {}).get("lon", "N/A"),  
            "latitude": element.get("center", {}).get("lat", "N/A"),  
            "geom_type": element["type"]
        }
        country_data.append(country)
    
    elapsed_time = time.time() - start_time
    print(f"Data processed in {elapsed_time:.2f} seconds.")
    
    return country_data

def save_to_csv(data, filename="(Courtney)_osm_countries.csv"):
    """Saves parsed data to a CSV file."""
    print("Saving data to CSV...")
    start_time = time.time()
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    elapsed_time = time.time() - start_time
    print(f"CSV saved as {filename} in {elapsed_time:.2f} seconds.")

# Run script
print("Script started...")
total_start_time = time.time()

osm_data = fetch_osm_data()
if osm_data:
    parsed_data = parse_osm_data(osm_data)
    save_to_csv(parsed_data)

total_elapsed_time = time.time() - total_start_time
print(f"Script completed in {total_elapsed_time:.2f} seconds.")



Script started...
Fetching data from Overpass API...
Data fetched in 9.46 seconds.
Processing country data...


Processing Countries: 100%|██████████| 243/243 [00:00<00:00, 380162.58it/s]

Data processed in 0.00 seconds.
Saving data to CSV...
CSV saved as (Courtney)_osm_countries.csv in 0.00 seconds.
Script completed in 9.56 seconds.





# Try to get all countries' data from World pop

In [2]:
# import required packages

import os
import pandas as pd
import numpy as np
import requests
import time

# read in country metadata
print(os.getcwd())

metadata = pd.read_excel('../Plan-EO_Country_meta-data.xlsx')

/sfs/ceph/standard/Plan-EO_Storage/Capstone-25


In [3]:
metadata = metadata[['Name', 'ISO2', 'ISO3']]

In [4]:
metadata.head()

Unnamed: 0,Name,ISO2,ISO3
0,Afghanistan,AF,AFG
1,Algeria,DZ,DZA
2,Angola,AO,AGO
3,Argentina,AR,ARG
4,Armenia,AM,ARM


In [6]:
def get_settlements(country_code):
    # Overpass API query
    query = f"""
    [out:csv(::id, name, place, population, ::lat, ::lon)][timeout:600];
        area["ISO3166-1"="{country_code}"]->.country;
    (
      node["place"](area.country);
      way["place"](area.country);
      relation["place"](area.country);
    );
    out;
    """
    
    # headers
    
    headers = {
        'User-Agent': 'Plan-EO_Pipeline/1.0 (cwp5xyj@virginia.edu)'
    }
    
    
    # Overpass API URL
    url = 'http://overpass-api.de/api/interpreter'
    
    # Send request
    response = requests.get(url, params={'data': query})
    
    # Check for errors
    if response.status_code != 200:
        raise Exception(f'Error {response.status_code}: {response.text}')
    
    
    
    return response

def convert_to_csv(settlements_json):
    settlements = []
    for element in settlements_json['elements']:
        osm_id = element['id']
        name = element['tags'].get('name')
        place_type = element['tags'].get('place')
        lat = element.get('lat')
        lon = element.get('lon')
        population = element['tags'].get('population')
        
        settlements.append({'osm_id': osm_id, 'name': name, 'place': place_type, 'latitude': lat, 'longitude': lon, 'population': population})
    
    # Convert to DataFrame
    df = pd.DataFrame(settlements)
    
    return df

In [10]:
%%time

test = get_settlements("DZ")
test
test_csv = convert_to_csv(test)

TypeError: 'Response' object is not subscriptable

In [8]:
test_csv

NameError: name 'test_csv' is not defined