In [22]:
import pandas as pd
import requests, csv

In [23]:
import requests
import pandas as pd

# Overpass API URL
overpass_url = "http://overpass-api.de/api/interpreter"

# Query for general and grocery stores in Aberdeen
overpass_query = """
[out:json];
area[name="Aberdeen"]->.searchArea;
(
  node["shop"="general"](area.searchArea);
  way["shop"="general"](area.searchArea);
  relation["shop"="general"](area.searchArea);
  
  node["shop"="supermarket"](area.searchArea);
  way["shop"="supermarket"](area.searchArea);
  relation["shop"="supermarket"](area.searchArea);
  
  node["shop"="convenience"](area.searchArea);
  way["shop"="convenience"](area.searchArea);
  relation["shop"="convenience"](area.searchArea);
  
  node["shop"="greengrocer"](area.searchArea);
  way["shop"="greengrocer"](area.searchArea);
  relation["shop"="greengrocer"](area.searchArea);
);
out center;
"""

# Send request to Overpass API
response = requests.get(overpass_url, params={'data': overpass_query})

# Check if request is successful
if response.status_code == 200:
    data = response.json()

    # List to store shop information
    shops = []

    # Iterate over elements from response
    for element in data.get('elements', []):
        if "tags" in element:
            # Extract latitude and longitude
            lat = element.get('lat', '') or element.get('center', {}).get('lat', '')
            lon = element.get('lon', '') or element.get('center', {}).get('lon', '')
            
            # Create shop dictionary
            shop = {
                'input_id': element['id'],
                'link': f"https://www.openstreetmap.org/{element['type']}/{element['id']}",
                'title': element['tags'].get('name', ''),
                'category': element['tags'].get('shop', ''),
                'plus_code': '',  # Optionally calculate plus code
                'latitude': lat,
                'longitude': lon,
                'complete_address': '',  # Optionally use reverse geocoding to get address
                'country': 'GB'  # Hardcoded country for now
            }
            shops.append(shop)

    # Create DataFrame
    df_shops = pd.DataFrame(shops)
    
    # Output the DataFrame
    print(df_shops)

else:
    print(f"Error: {response.status_code}")


      input_id                                           link  \
0    841886993   https://www.openstreetmap.org/node/841886993   
1   2286129084  https://www.openstreetmap.org/node/2286129084   
2   2550225472  https://www.openstreetmap.org/node/2550225472   
3   2550278882  https://www.openstreetmap.org/node/2550278882   
4   2553882493  https://www.openstreetmap.org/node/2553882493   
..         ...                                            ...   
75   975535950    https://www.openstreetmap.org/way/975535950   
76  1019712166   https://www.openstreetmap.org/way/1019712166   
77  1034823336   https://www.openstreetmap.org/way/1034823336   
78  1308915320   https://www.openstreetmap.org/way/1308915320   
79  1316058269   https://www.openstreetmap.org/way/1316058269   

                 title     category plus_code   latitude   longitude  \
0           China Town  supermarket             8.475162  -13.283362   
1        Harris Teeter  supermarket            35.156214  -79.416801   
2  

In [24]:
import csv

# Function to extract city/town/village names from CSV data
def extract_place_names(csv_file):
    place_names = []
    try:
        with open(csv_file, mode='r', newline='', encoding='latin-1') as file:
            reader = csv.DictReader(file)
            for row in reader:
                # Extract the name from the 'place23nm' column
                place_name = row.get('place23nm', None)
                if place_name:  # Ensure the place name is not empty
                    place_names.append(place_name)
    except FileNotFoundError:
        print(f"Error: The file {csv_file} was not found.")
    except KeyError:
        print(f"Error: The column 'place23nm' does not exist in the CSV file.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    return place_names

# Specify your CSV file path
csv_file_path = './IPN_GB_2024.csv'

# Extract place names
place_names = extract_place_names(csv_file_path)



In [25]:
place_names = list((set(place_names)))
place_names.sort()
print(
    len(place_names)
)

62689


In [26]:
import requests
import concurrent.futures
import pandas as pd
import threading
import time

# Overpass API endpoint
overpass_url = "http://overpass-api.de/api/interpreter"
shops = []
shops_lock = threading.Lock()  # To make appending to shops thread-safe
csv_save_interval = 200  # Save CSV every 200 runs

# Function to query Overpass API for a specific place
def query_overpass(place):
    print("Querying Overpass for: " + place)

    overpass_query = f"""
    [out:json];
    area[name="{place}"]->.searchArea;
    (
      node["shop"="general"](area.searchArea);
      way["shop"="general"](area.searchArea);
      relation["shop"="general"](area.searchArea);
      
      node["shop"="supermarket"](area.searchArea);
      way["shop"="supermarket"](area.searchArea);
      relation["shop"="supermarket"](area.searchArea);
      
      node["shop"="convenience"](area.searchArea);
      way["shop"="convenience"](area.searchArea);
      relation["shop"="convenience"](area.searchArea);
      
      node["shop"="greengrocer"](area.searchArea);
      way["shop"="greengrocer"](area.searchArea);
      relation["shop"="greengrocer"](area.searchArea);
    );
    out center;
    """

    try:
        # Send request to Overpass API
        response = requests.get(overpass_url, params={'data': overpass_query})

        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()
            print(f"Data returned for {place}: {len(data.get('elements', []))} shops")

            # Iterate over elements from response
            place_shops = []
            for element in data.get('elements', []):
                if "tags" in element:
                    # Extract latitude and longitude
                    lat = element.get('lat', '') or element.get('center', {}).get('lat', '')
                    lon = element.get('lon', '') or element.get('center', {}).get('lon', '')

                    # Create shop dictionary
                    shop = {
                        'input_id': element['id'],
                        'link': f"https://www.openstreetmap.org/{element['type']}/{element['id']}",
                        'title': element['tags'].get('name', ''),
                        'category': element['tags'].get('shop', ''),
                        'plus_code': '',  # Optionally calculate plus code
                        'latitude': lat,
                        'longitude': lon,
                        'complete_address': '',  # Optionally use reverse geocoding to get address
                        'country': 'GB'  # Hardcoded country for now
                    }
                    place_shops.append(shop)

            # Append to the global shop list (thread-safe)
            with shops_lock:
                shops.extend(place_shops)
                
            return len(place_shops)
        else:
            print(f"Error: {response.status_code} for place: {place}")
            return 0
    except Exception as e:
        print(f"Exception for place {place}: {e}")
        return 0

# Function to save the shop data to a CSV file
def save_to_csv(filename='shops_data.csv'):
    with shops_lock:
        df = pd.DataFrame(shops)
        df.to_csv(filename, index=False)
        print(f"Data saved to {filename}")

# Main function to run the Overpass queries concurrently
def run_queries_concurrently(place_names, num_workers=10):
    place_names_n = len(place_names)
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = []
        for i, place in enumerate(place_names):
            futures.append(executor.submit(query_overpass, place))

            # Save data every 200 runs
            if (i + 1) % csv_save_interval == 0:
                # Wait for the previous batch to complete
                concurrent.futures.wait(futures)
                save_to_csv(f'shops_data.csv')
                futures = []  # Reset futures for next batch

        # Ensure the last batch is saved after all queries are done
        concurrent.futures.wait(futures)
        save_to_csv(f'shops_data.csv')

# Example usage
if __name__ == "__main__": # Add your list of place names here
    run_queries_concurrently(place_names)


Querying Overpass for: A' Chill
Querying Overpass for: A' Chrìon Làraich
Querying Overpass for: A' Glas Pheighinn
Querying Overpass for: A' Mhointeach
Querying Overpass for: A' Phairce Dhubh
Querying Overpass for: A'Chorpaich
Querying Overpass for: Aaron's Hill
Querying Overpass for: Ab Kettleby
Querying Overpass for: Ab Lench
Querying Overpass for: Abaty Cwm-hir
Data returned for A' Glas Pheighinn: 0 shopsData returned for A' Chill: 0 shops
Data returned for A' Phairce Dhubh: 0 shops
Data returned for A' Mhointeach: 0 shops
Data returned for A'Chorpaich: 0 shops
Data returned for A' Chrìon Làraich: 0 shops
Querying Overpass for: Abbas Combe
Querying Overpass for: Abbas and Templecombe
Querying Overpass for: Abbas, Compton
Querying Overpass for: Abbas, Itchen
Querying Overpass for: Abberley

Querying Overpass for: Abberton
Data returned for Aaron's Hill: 0 shopsData returned for Abaty Cwm-hir: 0 shops
Data returned for Ab Lench: 0 shops
Querying Overpass for: Abberwick
Querying Overpas

KeyboardInterrupt: 

In [55]:
len(shops)

0