# Create dataset for Travelling Sales Person problem

Create dataset of all municipalities (gemeentes) in Belgium.

## setup

In [1]:
import requests
import json



## Fetch Data from Overpass API

Get all village like things in belgium,
It is open data so it is messy.

In [2]:

query = """
    [out:json][timeout:50];
    area["ISO3166-1"="BE"][admin_level=2]->.searchArea;
    (
        node["place"="town"](area.searchArea);
        node["place"="city"](area.searchArea);
        node["place"="village"](area.searchArea);
        node["place"="municipality"](area.searchArea);
    );
    out center;
    """

In [3]:
def fetch_data_from_overpass():
    print("Fetching data...")
    overpass_url = "https://overpass-api.de/api/interpreter"
    
    response = requests.post(overpass_url, data={'data': query})
    if response.status_code == 200:
        return response.json()  # This is the raw JSON data
    else:
        raise Exception(f"Failed to fetch data: HTTP Status {response.status_code}")

takes about 18s

In [4]:
data = fetch_data_from_overpass()

Fetching data...


## Convert Data to GeoJSON

In [5]:
def convert_to_geojson(data):
    features = []
    for element in data['elements']:
        feature = {
            "type": "Feature",
            "properties": {
                "name": element.get("tags", {}).get("name", "Unnamed"),
                "place": element.get("tags", {}).get("place", "Unknown")
            },
            "geometry": {
                "type": "Point",
                "coordinates": [element['lon'], element['lat']]
            }
        }
        features.append(feature)
    return {
        "type": "FeatureCollection",
        "features": features
    }

In [6]:
geojson_data = convert_to_geojson(data)
print(len(geojson_data['features']))

3366


save to file
because data might change

In [7]:
def writeToFile(data, filename):
    # Write the GeoJSON data to a file
    with open(filename, 'w') as file:
        json.dump(data, file)
    print(f"Data saved to {filename}")

# set to true if you want to create a new dataset
toWriteOrnotToWrite = False
if toWriteOrnotToWrite:
    writeToFile(geojson_data, 'villages.geojson')

read stored villages from data file

In [8]:
def loadGeoJsonFile(filename):
    with open(filename, 'r') as file:
        feature_collection = json.load(file)

    # Print the number of features
    num_features = len(feature_collection['features'])
    print("Number of features:", num_features)
    
    return feature_collection
    
file_name = 'villages.geojson'

feature_collection = loadGeoJsonFile(file_name)

Number of features: 3366


## Filter actual municipalities

We have 3366 village-like places

this is too large of a dataset to work with (for now)

Let us reduce it to the 581 municipalities

I got this list of municipalities from Wikipedia

In [9]:
file_path = 'municipalityNames.txt'
gemeentes = []

# Open the file and read each line into a list
with open(file_path, 'r') as file:
    gemeentes = [line.strip() for line in file]

print(len(gemeentes))

581


filter the feature collection to match the name

In [10]:
def getFilteredMunicipalities():
    included_names = set()
    filtered_features = []
    for feature in feature_collection['features']:
        feature_name = feature['properties']['name']
        # Check if any part of the feature name contains any gemeente name from the list
        if feature_name in gemeentes and feature_name not in included_names:
            # print(feature_name)
            filtered_features.append(feature)
            included_names.add(feature_name)


    # Create a new GeoJSON FeatureCollection with the filtered features
    filtered_geojson = {
        "type": "FeatureCollection",
        "features": filtered_features
    }

    print(len(filtered_features))
    return filtered_geojson

filtered_geojson = getFilteredMunicipalities()

511


It seems that we have a match for 511 out of 581 municipalities

In [11]:
# Extract names from the filtered GeoJSON features
filtered_names = [feature['properties']['name'] for feature in filtered_geojson['features']]

# Find gemeentes not in the filtered GeoJSON
missing_gemeentes = [name for name in gemeentes if name not in filtered_names]

# Print missing gemeentes
print(len(missing_gemeentes))
print(missing_gemeentes)

70
['Brussel', 'Luik', 'Schaarbeek', 'Namen', 'Sint-Jans-Molenbeek', 'Bergen', 'Elsene', 'Ukkel', 'Doornik', 'Sint-Lambrechts-Woluwe', 'Moeskroen', 'Sint-Gillis', 'Sint-Pieters-Woluwe', 'Eigenbrakel', 'Waver', 'Oudergem', 'Aarlen', 'Aat', 'Nijvel', 'Zinnik', 'Tubeke', 'Ronse', 'Sint-Joost-ten-Node', 'Gembloers', 'Sint-Agatha-Berchem', 'Watermaal-Bosvoorde', "'s-Gravenbrakel", 'Hoei', 'Lessen', 'Sint-Genesius-Rode', 'Komen-Waasten', 'Wezet', "Fontaine-l'Evêque", 'Hannuit', 'Bastenaken', 'Genepiën', 'Borgworm', 'Geldenaken', 'Edingen', 'Graven', 'Blégny', 'Frasnes-lez-Anvaing', 'Ecaussines', 'Kelmis', 'Jurbeke', 'Court-Saint-Étienne', 'Blieberg', 'Steenput', 'Kasteelbrakel', 'Perwijs', 'Bitsingen', 'Habay', 'Le Roeulx', 'Opzullik', 'Weismes', 'Terhulpen', 'Bevekom', 'Itter', 'Hastière', 'Etalle', 'Elzele', 'Limburg', 'Thimister-Clermont', 'Voeren', 'Oerle', 'Vloesberg', 'Lijsem', 'Bever', 'Spiere-Helkijn', 'Mesen']


They have a different name in the feature list than in the Wikipedia list.

I matched them manually to this second list of gemeente names

- I should probably fix the names in the features list

In [12]:
file_path = 'municipalityNamesExtra.txt'
gemeentes2 = []
with open(file_path, 'r') as file:
    gemeentes2 = [line.strip() for line in file]
    
gemeentes = gemeentes + gemeentes2

# `lines` now contains each line of the file as a list element
print(len(gemeentes))

651


In [13]:
municipalities = getFilteredMunicipalities()

581


In [15]:
# set to true if you want to create a new dataset
toWriteOrnotToWrite = True
if toWriteOrnotToWrite:
    writeToFile(municipalities, 'municipalities.geojson')

Data saved to municipalities.geojson
