### Notebook purpose
- Connect to Overpass API with overpy
- Extract coordinates of all hiking trails within Switzerland
- Convert data into a pandas dataframe object
- Create a table in SQL database (hosted on Microsoft Azure)
- Store coordinates in SQL DB

In [12]:
# Import required libraries
import os
import json
import overpy
import pyodbc
import urllib
import pymssql
import pandas as pd 
from sqlalchemy import create_engine

In [13]:
# Initialize the Overpass API with a custom URL
api = overpy.Overpass(url="http://overpass.osm.ch/api/interpreter")

# Overpass query for hiking trails within Switzerland. Using 'center', we obtain the coordinates in the middle of a hiking trail
query = """
[out:json];
relation
["route"="hiking"]
["name"!~"fixme", i]
["network"="lwn"]
["osmc:symbol"~"yellow::yellow_diamond|red:white:red_bar|yellow:white:yellow_diamond|blue:white:blue_bar"]
(45.8899, 6.0872, 47.8085, 10.4921);
out center tags;
"""

# Execute the request
result = api.query(query)

# Add time and datestamp of API call to dataframe
timestamp_apicall = pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")

# List to store the extracted information
list = []

# Iterate over all relations
for relation in result.relations:

    # Extract relevant data
    org_name = relation.tags.get('name')
    fix_name = ""
    org_to = relation.tags.get('to')
    org_from = relation.tags.get('from')
    
    # Center is a tuple with latitude and longitude, we want only a single value
    lat = getattr(relation, 'center_lat')
    lon = getattr(relation, 'center_lon')
    
    # If the original name is not available, construct it from 'from' and 'to'
    if not org_name and org_from and org_to:
        fix_name = f"{org_from} - {org_to}"
    else:
        fix_name = org_name

    # Create a dictionary to store the attributes as a tuple
    if fix_name and lat > 0 and lon > 0:
        dict = {
        'timestamp_apicall': timestamp_apicall,    
        'id': relation.id,
        'name': fix_name,
        'lat': lat,
        'lon': lon
        }

        # Each tuple is now saved in the list as a new row
        list.append(dict)

# Once all data is processed, create the DataFrame
df_wanderwege = pd.DataFrame(list)

# Print the DataFrame
print(df_wanderwege.head(5))

     timestamp_apicall      id                                          name  \
0  2024-09-23 13:57:01   22614  Nationalpark Wanderroute 15 (Munt la Schera)   
1  2024-09-23 13:57:01  103607                                 Wanderwege SG   
2  2024-09-23 13:57:01  112830                Uetliberg - Uetliberg Uto Kulm   
3  2024-09-23 13:57:01  112831                           Folenweid - Baldern   
4  2024-09-23 13:57:01  112833                          Felsenegg - Balderen   

          lat         lon  
0  46.6501430  10.2301992  
1  47.4309774   9.6201700  
2  47.3511680   8.4897796  
3  47.3291235   8.5007261  
4  47.3152439   8.5050559  


In [14]:
# Get current working directory
current_dir = os.getcwd()

# Load database access configuration 
with open('../config/db_config.json', 'r') as f:
    db_config = json.load(f)

# Define the server, database, user and password
server = db_config['server']
database = db_config['database']
db_user = db_config['db_user']
db_password = db_config['db_password']

# Connect to the database
conn = pymssql.connect(server, db_user, db_password, database)

# Create connection string for sqlalchemy
engine = create_engine(f"mssql+pymssql://{db_user}:{db_password}@{server}/{database}")

# Write the DataFrame to the MSSQL database
df_wanderwege.to_sql(name='overpass', con=engine, if_exists='replace', index=False)

# Close the connection
conn.close()

print("DataFrame erfolgreich in die MSSQL-Datenbank geladen!")

DataFrame erfolgreich in die MSSQL-Datenbank geladen!
