In [1]:
from pymongo import MongoClient
import requests
import json
import os
import datetime as dt

Setting up the MongoDB.

In [2]:
client = MongoClient(host="localhost",port=27017)
db = client["data_base_OSM"]
collection = db["bicycle_amenities"]

Extracting the data of needed amenities from the OSM JSON file and loading it into the MongoDB.

In [None]:
def extract_amenity(name_input_file, amenity_list, name_output_file):
    if name_output_file + '.json' in os.listdir():
        print("Filtered data already exists!")
    else:
        with open(name_input_file + ".json", 'r') as file:
            data = json.load(file)

        individual_nodes = []

        for node in data['nodes']:
            amenity = node.get('amenity')
            if amenity in amenity_list:
                individual_nodes.append({'node': node})

        with open(name_output_file + '.json', 'w') as output_file:
            json.dump(individual_nodes, output_file, indent=2)

        print("Filtered data has been written to 'amenity_filtered.json'")


def load_mongo_db(file_name):
    try:
        collection.drop()
        print("Collection has been droped!")

        # JSON-Daten aus einer Datei lesen
        with open(file_name + '.json', 'r') as file:
            data = json.load(file)

        if isinstance(data, list):
            collection.insert_many(data)
        else:
            collection.insert_one(data)
        print("Data has been loaded to MongoDB!")

    except Exception as e:
        print(f"An error occurred: {e}")
        
    
amenities = ["bicycle_parking", "bicycle_rental", "bicycle_repair_station",
                 "compressed_air", "drinking_water", "shelter"]

extract_amenity('osm-output', amenities, 'amenity_filtered')

load_mongo_db('../data/amenities_2023-11-17')

Adding cantons to the documents by using the OSM API.

In [None]:
def get_canton(lat, lon):
    try:
        response = requests.get(f'https://nominatim.openstreetmap.org/reverse?format=json&lat={lat}&lon={lon}&zoom=18&addressdetails=1')
        data = response.json()
        return data['address']['state']
    except:
        print("Error: Could not get canton from OSM API")
        return "No canton found"


def add_kanton_to_db():
    counter = 0
    
    for doc in collection.find():

        lat = float(doc['node']['lat'])  # String to float
        lon = float(doc['node']['lon'])
        canton = get_canton(lat, lon)
        collection.update_one({'_id': doc['_id']}, {'$set': {'node.canton': canton}})
        print(f"{counter}, added canton '{canton}' to document with id '{doc['_id']}'")
        counter += 1
    
    print("Cantons have been added to the database!")
    
add_kanton_to_db()

There are some POIs that are not in Switzerland. These will be deleted.

In [17]:
cantons = ['Aargau',
              'Appenzell Ausserrhoden',
              'Appenzell Innerrhoden',
              'Basel-Landschaft',
              'Basel-Stadt',
              'Bern',
              'Freiburg',
              'Genf',
              'Glarus',
              'Graubünden',
              'Jura',
              'Luzern',
              'Neuenburg',
              'Nidwalden',
              'Obwalden',
              'Schaffhausen',
              'Schwyz',
              'Solothurn',
              'St. Gallen',
              'Tessin',
              'Thurgau',
              'Uri',
              'Waadt',
              'Wallis',
              'Zug',
              'Zürich']


def delete_wrong_POIs(cantons, wrong_POIs):
    wrong_POIs = collection.distinct("node.canton", {"node.canton": {"$nin": cantons}})
    collection.delete_many({"node.canton": {"$nin": cantons}})
    print(f"Deleted documents with wrong location: '{wrong_POIs}'")


delete_wrong_POIs(cantons)

Deleted documents with canton '['Baden-Württemberg', 'Vorarlberg', 'Trentino-Alto Adige/Südtirol', 'Lombardia', 'Auvergne-Rhône-Alpes', 'No canton found', 'Grand Est', 'Bourgogne-Franche-Comté', "Valle d'Aosta / Vallée d'Aoste"]'


The cantons with french or italian names will be translated to german.

In [25]:
canton_translation = {
    "Vaud": "Waadt",
    "Graubünden/Grischun/Grigioni": "Graubünden",
    "Bern/Berne": "Bern",
    "Valais/Wallis": "Wallis",
    "Neuchâtel": "Neuenburg",
    "Ticino": "Tessin",
    "Genève": "Genf",
    "Fribourg/Freiburg": "Freiburg"
}


def translate_canton(canton_translation):
    for key, value in canton_translation.items():
        collection.update_many({"node.canton": key}, {"$set": {"node.canton": value}})
        print(f"Updated documents with canton '{key}' to '{value}'")


translate_canton(canton_translation)

Updated documents with canton 'Vaud' to 'Waadt'
Updated documents with canton 'Graubünden/Grischun/Grigioni' to 'Graubünden'
Updated documents with canton 'Bern/Berne' to 'Bern'
Updated documents with canton 'Valais/Wallis' to 'Wallis'
Updated documents with canton 'Neuchâtel' to 'Neuenburg'
Updated documents with canton 'Ticino' to 'Tessin'
Updated documents with canton 'Genève' to 'Genf'
Updated documents with canton 'Fribourg/Freiburg' to 'Freiburg'


Adding geojson location to the documents for spatial queries.

In [39]:
def add_location_to_documents():
    for document in collection.find():
        lat = float(document['node']['lat'])
        lon = float(document['node']['lon'])

        # Erstellen des GeoJSON-Objekts
        location = {
            "type": "Point",
            "coordinates": [lon, lat]  # Längen- und Breitengrad
        }

        # Aktualisieren des Dokuments in der Datenbank
        collection.update_one(
            {"_id": document['_id']},
            {"$set": {"node.location": location}}
        )

    print("Location field added to all documents.")
    
add_location_to_documents()

Location field added to all documents.


Enrichment of the data with compressed air stations in the city of Zurich.

In [33]:
data = json.load(open("../data/taz.velopumpstationen_p.json", "r", encoding="utf-8"))

def transform_feature(feature):
    # Extrahiert und transformiert ein einzelnes Feature
    transformed = {
        "amenity": "compressed_air",
        "id": str(feature["properties"]["id1"]),
        "lat": str(feature["geometry"]["coordinates"][1]),
        "lon": str(feature["geometry"]["coordinates"][0]),
        "canton": "Zürich",
        "location": {
            "type": "Point",
            "coordinates": feature["geometry"]["coordinates"]
        }
    }
    return {"node": transformed}

def transform_json(input_data):
    # Transformiert die gesamte FeatureCollection
    transformed_features = [transform_feature(feature) for feature in input_data["features"]]
    return transformed_features

# Transformation durchführen
transformed_json = transform_json(data)

print(transformed_json[0])

json.dump(transformed_json, open("../data/compressed_air_transformed.json", "w", encoding="utf-8"))

{'node': {'amenity': 'compressed_air', 'id': '1011', 'lat': '47.3917066531', 'lon': '8.5185306277', 'canton': 'Zürich', 'location': {'type': 'Point', 'coordinates': [8.5185306277, 47.3917066531]}}}


Scrape data of Gemeinden > 10000 Einwohner from Wikipedia.

In [ ]:
import data_preprocessing_tools.webScraper as ws

scraper = ws.SwissCommuneScraper()

if scraper.fetch_data():
    scraper.save_data('../../data/raw_data/grosse_gemeinden_data.json')
    
def load_data_in_db(data):
    db = MongoClient("mongodb://localhost:27017/")["data_base_OSM"]
    collection = db["bike_ways"]
    collection.insert_many(data)
    print("Inserted documents into database")
    
load_data_in_db(scraper.data)

Fetch and load bicycle ways in collection bike_ways

In [None]:
import data_preprocessing_tools.cycleWays as cw

gemeinden = collection.distinct("Gemeinde")

for i in range(len(gemeinden)):
    bicycle_ways = cw.BicycleWays(gemeinden[i])
    print(f"Gesamtlänge der Fahrradwege in {gemeinden[i]}: {bicycle_ways.total_cycleway_length} km")
    collection.update_one({"Gemeinde": gemeinden[i]},
                            {"$set": {"Fahrradwege in km": bicycle_ways.total_cycleway_length}})