## Import Packages

In [1]:
import pandas as pd
import json
import geopandas as gpd
from pprint import pprint
from shapely.geometry import Point, LineString, Polygon


## Process Data

In [2]:
def geometry_from_overpass(element):
    coords = [(coord["lon"], coord["lat"]) for coord in element["geometry"]]

    if coords[0] == coords[-1]:
        return Polygon(coords)
    else:
        return LineString(coords)


def get_gdf(data_dir):
    try:
        with open(data_dir, encoding="utf-8") as f:
            data = json.load(f)
    except:
        exception = "Error: Could not load the file"
        return exception
            
    features = []

    for element in data["elements"]:
        el_type = element["type"]
        tags = element.get("tags", {})
        tags["id"] = element["id"]
        element["tags"]["name"] = element["tags"].get("name", "Unnamed")
        element["tags"]["latitude"] = element.get("lat", None)
        element["tags"]["longitude"] = element.get("lon", None)

        if el_type in ["way", "relation"] and "geometry" in element:
            geom = geometry_from_overpass(element)
            features.append({**tags, "geometry": geom})

        elif el_type == "node":
            lon = element["lon"]
            lat = element["lat"]
            geom = Point(lon, lat)
            features.append({**tags, "geometry": geom})

    return gpd.GeoDataFrame(features, crs="EPSG:4326")


### Caves

In [None]:
gdf = get_gdf("./raw_data/caves.geojson")
caves_columns = ["id", "name", "longitude","latitude","geometry"]
caves_df = gdf[caves_columns]
caves_df[:]['type'] = 'cave'

In [306]:
caves_df.to_csv("./processed_data/caves.csv", index=False, sep=";")

### Cultural Attractions

These include
* artworks
* memorials
* Galleries and Museums
* Castles

In [12]:
gdf = get_gdf("./raw_data/cultural_attractions.geojson")


In [13]:
artworks_df = gdf[gdf.tourism=='artwork'][['id', 'name', 'latitude', 'longitude','geometry', 'artist_name', 'description', 'source:website','artwork_type']]
memorials_df = gdf[gdf.historic.isin(['memorial','monument','castle']) ][['id', 'name', 'latitude', 'longitude','geometry','memorial', 'historic', 'inscription']]
gallery_and_museum_df = gdf[gdf.tourism.isin(['gallery','museum'])][['id', 'name', 'latitude', 'longitude','geometry','contact:website','tourism','addr:street','addr:city','addr:postcode','addr:housenumber']]

In [14]:
artworks_df.rename(columns={'artwork_type':'type', 'source:website':'website'}, inplace=True)
memorials_df['type'] = memorials_df['historic'] + memorials_df['memorial']
memorials_df.rename(columns={'memorial':'memorial_type'}, inplace=True)
memorials_df.drop(columns=['historic'], inplace=True)
gallery_and_museum_df.rename(columns={'addr:street':'street', 'addr:city':'city', 'addr:postcode':'postcode', 'addr:housenumber':'housenumber', 'contact:website':'website'}, inplace=True)

In [15]:
artworks_df.to_csv("./processed_data/artworks.csv", index=False, sep=";")
memorials_df.to_csv("./processed_data/memorials.csv", index=False, sep=";")
gallery_and_museum_df.to_csv("./processed_data/gallery_and_museums.csv", index=False, sep=";")

### Food and Drink Establishments

In [23]:
gdf = get_gdf("./raw_data/food_and_drink_establishments.geojson")


In [24]:
food_and_drink_establishments_df = gdf[['id','latitude','longitude','geometry','name','cuisine','operator',
                                        'addr:street','addr:city','addr:postcode','addr:housenumber',
                                        'contact:website','phone','contact:phone','contact:email','website',
                                        'amenity'
                                        ]].copy(deep=True)

In [25]:
food_and_drink_establishments_df.website.isna().sum(), food_and_drink_establishments_df['contact:website'].isna().sum(), 
food_and_drink_establishments_df[:]['website'] = food_and_drink_establishments_df['website'].combine_first(food_and_drink_establishments_df['contact:website'])
food_and_drink_establishments_df[:]['phone'] = food_and_drink_establishments_df['phone'].combine_first(food_and_drink_establishments_df['contact:phone'])
food_and_drink_establishments_df.rename(columns={'addr:street':'street', 'addr:city':'city', 'addr:postcode':'postcode', 'addr:housenumber':'housenumber','amenity':'type','contact:email':'email'}, inplace=True)
food_and_drink_establishments_df.drop(columns=['contact:website', 'contact:phone'], inplace=True)

In [218]:
food_and_drink_establishments_df.to_csv("./processed_data/food_and_drink_establishments.csv", index=False,sep=";")

### Holiday Apartments and Houses

In [56]:
gdf = get_gdf("./raw_data/holiday_apartments_and_houses.geojson")

holiday_apartments_and_houses_df = gdf[['id','latitude','longitude','geometry','name','addr:street', 'addr:city','addr:postcode','addr:housenumber','contact:website','contact:phone','contact:email','website','phone','email','tourism']].copy(deep=True)


In [57]:
holiday_apartments_and_houses_df[:]['website'] = holiday_apartments_and_houses_df['website'].combine_first(holiday_apartments_and_houses_df['contact:website'])
holiday_apartments_and_houses_df[:]['phone'] = holiday_apartments_and_houses_df['phone'].combine_first(holiday_apartments_and_houses_df['contact:phone'])
holiday_apartments_and_houses_df[:]['email'] = holiday_apartments_and_houses_df['email'].combine_first(holiday_apartments_and_houses_df['contact:email'])
holiday_apartments_and_houses_df.rename(columns={'addr:street':'street', 'addr:city':'city', 'addr:postcode':'postcode', 'addr:housenumber':'housenumber','tourism':'type'}, inplace=True)

holiday_apartments_and_houses_df.drop(columns=['contact:website', 'contact:phone', 'contact:email'], inplace=True)

In [58]:
holiday_apartments_and_houses_df.to_csv("./processed_data/holiday_apartments_and_houses.csv", index=False, sep=";")


### Hotels and Accomodation

In [60]:
gdf = get_gdf("./raw_data/hotels_and_accommodation.geojson")

hotels_and_accommodation_df = gdf[['id','latitude','longitude','geometry','name','tourism',
                                   'addr:street', 'addr:city','addr:postcode','addr:housenumber',
                                   'cuisine','description','operator',
                                   'contact:website','contact:phone','contact:email','website','phone','email']].copy(deep=True)



In [61]:
hotels_and_accommodation_df[:]['website'] = hotels_and_accommodation_df['website'].combine_first(hotels_and_accommodation_df['contact:website'])
hotels_and_accommodation_df[:]['phone'] = hotels_and_accommodation_df['phone'].combine_first(hotels_and_accommodation_df['contact:phone'])
hotels_and_accommodation_df[:]['email'] = hotels_and_accommodation_df['email'].combine_first(hotels_and_accommodation_df['contact:email'])
hotels_and_accommodation_df.rename(columns={'addr:street':'street', 'addr:city':'city', 'addr:postcode':'postcode', 'addr:housenumber':'housenumber','tourism':'type'}, inplace=True)

hotels_and_accommodation_df.drop(columns=['contact:website', 'contact:phone', 'contact:email'], inplace=True)

In [244]:
hotels_and_accommodation_df.to_csv("./processed_data/hotels_and_accommodation.csv", index=False,sep=";")


### Lakes and Rivers

In [64]:
with open('./raw_data/lakes_and_rivers.geojson', "r") as file:
    data = json.load(file)


In [None]:
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, LineString
from shapely import wkt

# Load your Overpass JSON data
with open('./raw_data/lakes_and_rivers.geojson', "r") as file:
    data = json.load(file)


# Initialize lists to hold lake and river data
lakes = []
rivers = []

# Process each element in the JSON data
for element in data["elements"]:
    tags = element.get("tags", {})

    # Extract lakes based on 'natural=water' and 'water=lake'
    if tags.get("natural") == "water" and tags.get("water") == "lake":
        # For relations, use the center and the geometry from members; otherwise use the element's geometry
        if element["type"] == "relation":
            lat = element.get("center", {}).get("lat")
            lon = element.get("center", {}).get("lon")
            # We assume the geometry comes from the first member (adjust if needed)
            geom_data = element.get("members", [{}])[0].get("geometry", [])
        else:
            lat = element.get("lat")
            lon = element.get("lon")
            geom_data = element.get("geometry", [])
        
        wkt_geom = None
        if isinstance(geom_data, list) and len(geom_data) >= 3:
            # Convert coordinate dictionaries to (lon, lat) tuples
            coords = [(pt["lon"], pt["lat"]) for pt in geom_data]
            # Ensure the polygon is closed by checking if the first and last coordinate are the same.
            if coords[0] != coords[-1]:
                coords.append(coords[0])
            try:
                poly = Polygon(coords)
                wkt_geom = poly.wkt  # WKT literal of the polygon
            except Exception as e:
                print(f"Error creating polygon for lake element {element['id']}: {e}")
        
        lakes.append({
            "id": element["id"],
            "name": tags.get("name", "Unnamed"),
            "latitude": lat,
            "longitude": lon,
            "bounds": element.get("bounds", ''),
            "geometry": wkt_geom,
            "type": "lake",
        })
        
    # Extract rivers based on 'waterway=river'
    elif tags.get("waterway") == "river":
        geom_data = element.get("geometry", [])
        wkt_geom = None
        # For a river, even if there are only 2 coordinates, we can create a LineString
        if isinstance(geom_data, list) and len(geom_data) >= 2:
            coords = [(pt["lon"], pt["lat"]) for pt in geom_data]
            try:
                line = LineString(coords)
                wkt_geom = line.wkt  # WKT literal of the linestring
            except Exception as e:
                print(f"Error creating LineString for river element {element['id']}: {e}")
        
        rivers.append({
            "id": element["id"],
            "name": tags.get("name", "Unnamed"),
            "latitude": element.get("center", {}).get("lat"),
            "longitude": element.get("center", {}).get("lon"),
            "bounds": element.get("bounds", ''),
            "geometry": wkt_geom,
            "type": "river",
        })
# Create DataFrames for lakes and rivers
lakes_df = pd.DataFrame(lakes)
rivers_df = pd.DataFrame(rivers)


In [87]:
lakes_df.dropna(axis=0, subset=['geometry'], inplace=True)
rivers_df.dropna(axis=0, subset=['geometry'], inplace=True)


In [89]:

# Save the DataFrames to CSV (the "geometry" column contains the WKT literal)
lakes_df.to_csv("./processed_data/lakes.csv", index=False, sep=";")
rivers_df.to_csv("./processed_data/rivers.csv", index=False, sep=";")

### Beaches

In [261]:
with open('./raw_data/beaches.geojson', "r") as file:
    data = json.load(file)


In [262]:
tag_items = {tag for element in data["elements"] for tag in element.get("tags", {})}
tag_items

{'access',
 'created_by',
 'description',
 'dog',
 'leisure',
 'lifeguard',
 'name',
 'natural',
 'sport',
 'supervised',
 'surface'}

In [94]:
# Initialize lists to hold lake and river data
beach = []

# Process each element in the JSON data
for element in data["elements"]:
    tags = element.get("tags", {})
    # Extract beaches based on 'natural=beach' 
    if tags.get("natural") == "beach":
        if element["type"] == "node":
            beach.append({
                "id": "beach_id_"+str(element["id"]),
                "name": tags.get("name", "Unnamed"),
                "latitude": element.get("lat"),
                "longitude": element.get("lon"),
                "bounds": element.get("bounds", ''),
                "geometry": element.get("geometry", 'POINT (' + str( element['center']["lon"]) + ' ' + str( element['center']["lat"]) + ')' ),
                "type": tags.get("natural", "beach")+(':'+tags.get("surface", "")) if tags.get("surface") else 'beach',
            })
        else:
            beach.append({
                "id": "beach_id_"+str(element["id"]),
                "name": tags.get("name", "Unnamed"),
                "latitude": element.get("lat"),
                "longitude": element.get("lon"),
                "bounds": element.get("bounds", ''),
                "geometry": element.get("geometry", 'POINT (' + str( element['center']["lon"]) + ' ' + str( element['center']["lat"]) + ')' ),
                "type": tags.get("natural", "beach")+(':'+tags.get("surface", "")) if tags.get("surface") else 'beach',
                
            })
    

# Create DataFrames for beach
beach_df = pd.DataFrame(beach)




In [96]:
# Load JSON data
with open('./raw_data/beaches.geojson', "r") as file:
    data = json.load(file)

# Initialize list for beaches
beach = []

# Process each element in the JSON data
for element in data["elements"]:
    tags = element.get("tags", {})

    # Extract beaches based on 'natural=beach'
    if tags.get("natural") == "beach":
        geometry_wkt = None  # Default empty geometry

        if element["type"] == "node":
            # Nodes: Use a Point
            lat = element.get("lat")
            lon = element.get("lon")
            if lat and lon:
                geometry_wkt = Point(lon, lat).wkt

        elif element["type"] in ["way", "relation"]:
            # Ways/Relations: Convert geometry list to Polygon (or LineString)
            geom_data = element.get("geometry", [])
            if isinstance(geom_data, list) and len(geom_data) >= 3:
                coords = [(pt["lon"], pt["lat"]) for pt in geom_data]
                # Ensure it forms a closed polygon
                if coords[0] != coords[-1]:
                    coords.append(coords[0])
                try:
                    geometry_wkt = Polygon(coords).wkt  # Polygon WKT
                except Exception as e:
                    print(f"Error creating polygon for beach {element['id']}: {e}")

            elif isinstance(geom_data, list) and len(geom_data) >= 2:
                # If not enough points for a polygon, store as LineString
                coords = [(pt["lon"], pt["lat"]) for pt in geom_data]
                try:
                    geometry_wkt = LineString(coords).wkt  # LineString WKT
                except Exception as e:
                    print(f"Error creating linestring for beach {element['id']}: {e}")

        # Append data to beach list
        beach.append({
            "id": f"beach_id_{element['id']}",
            "name": tags.get("name", "Unnamed"),
            "latitude": element.get("lat") if element["type"] == "node" else None,
            "longitude": element.get("lon") if element["type"] == "node" else None,
            "bounds": element.get("bounds", ''),
            "geometry": geometry_wkt,  # Store WKT format
            "type": tags.get("natural", "beach") + (":" + tags.get("surface", "")) if tags.get("surface") else "beach",
        })

# Create a DataFrame
beach_df = pd.DataFrame(beach)


In [103]:

# Save to CSV
beach_df.to_csv("./processed_data/beaches.csv", index=False, sep=";")



### Peaks and Viewpoints

In [104]:
gdf = get_gdf("./raw_data/peaks_and_viewpoints.geojson")


peaks_and_viewpoints_df = gdf[["id", "name", "longitude","latitude","geometry", "description","historic", "amenity","height","website","natural" ]].copy(deep=True)
peaks_and_viewpoints_df.rename(columns={'natural':'type'}, inplace=True)

In [277]:
peaks_and_viewpoints_df.to_csv("./processed_data/peaks_and_viewpoints.csv", index=False, sep=";")

### Protected Areas

In [114]:
with open('./raw_data/protected_areas.geojson', "r") as file:
    data = json.load(file)

In [117]:
len(data)

4

In [128]:


protected_areas = []


for element in data["elements"]:
    tags = element.get("tags", {})
    protected_areas.append({
        "id": "PA_id_"+ str(element["id"]),
        "latitude": element['center']["lat"],
        "longitude": element['center']["lon"],
        "geometry": element.get("geometry", 'POINT (' + str( element['center']["lon"]) + ' ' + str( element['center']["lat"]) + ')' ),
        "name": tags.get("name"),
        "source": tags.get("source"),
        "website": tags.get("website"),
        "protection_title": tags.get("protection_title"),
        "leisure": tags.get("leisure"),
        "type":'protected area'
        
    })
    
    
protected_areas_df = pd.DataFrame(protected_areas)




In [129]:
protected_areas_df

Unnamed: 0,id,latitude,longitude,geometry,name,source,website,protection_title,leisure,type
0,PA_id_4710461,45.884332,10.692263,POINT (10.6922633 45.8843322),Rete di riserve Alpi Ledrensi,Provincia autonoma di Trento,https://www.reteriservealpiledrensi.tn.it/,,,protected area
1,PA_id_15595792,46.490497,11.586921,POINT (11.586921 46.4904972),Naturpark Schlern-Rosengarten – Parco naturale...,EEA CDDA (v20),https://parchi-naturali.provincia.bz.it/parco-...,Parco naturale,nature_reserve,protected area
2,PA_id_15638582,46.53694,11.668578,POINT (11.668578 46.5369404),Landschaftsschutzgebiet Seiser Alm – Zona di T...,"""Piani paesaggistici: Delimitazioni dei Piani ...",https://seiseralm-schlerngebiet.com/seiseralm/...,Zona di Tutela Paesaggistica,,protected area


In [130]:
protected_areas_df.to_csv("./processed_data/protected_areas.csv", index=False, sep=";")

### Skiing and Winter Sports facility

In [139]:
with open('./raw_data/skiing_and_winter_sports.geojson', "r") as file:
    data = json.load(file)

In [147]:
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString
from shapely import wkt

# Load JSON data
with open('./raw_data/skiing_and_winter_sports.geojson', "r") as file:
    data = json.load(file)

# Initialize list for skiing & piste areas
sna = []

# Process each element in the JSON data
for element in data["elements"]:
    tags = element.get("tags", {}).copy()  # Copy tags to avoid modifying original

    geometry_wkt = None  # Default empty geometry

    if element["type"] == "node":
        # Nodes: Represented as Points
        lat = element.get("lat")
        lon = element.get("lon")
        if lat and lon:
            geometry_wkt = Point(lon, lat).wkt

    elif element["type"] in ["way", "relation"]:
        # Ways/Relations: Convert geometry list to Polygon (or LineString)
        geom_data = element.get("geometry", [])
        if isinstance(geom_data, list) and len(geom_data) >= 3:
            coords = [(pt["lon"], pt["lat"]) for pt in geom_data]
            # Ensure it forms a closed polygon
            if coords[0] != coords[-1]:
                coords.append(coords[0])
            try:
                geometry_wkt = Polygon(coords).wkt  # Polygon WKT
            except Exception as e:
                print(f"Error creating polygon for SNA {element['id']}: {e}")

        elif isinstance(geom_data, list) and len(geom_data) >= 2:
            # If not enough points for a polygon, store as LineString
            coords = [(pt["lon"], pt["lat"]) for pt in geom_data]
            try:
                geometry_wkt = LineString(coords).wkt  # LineString WKT
            except Exception as e:
                print(f"Error creating linestring for SNA {element['id']}: {e}")

    # Build type field with sport and piste details
    sport_type = tags.pop("sport", "skiing")
    piste_type = tags.pop("piste:type", "")
    full_type = f"{sport_type}:{piste_type}" if piste_type else sport_type

    # Append to list
    sna.append({
        "id": f"PA_id_{element['id']}",
        "name": tags.pop("name", ""),
        "latitude": element.get("lat") if element["type"] == "node" else None,
        "longitude": element.get("lon") if element["type"] == "node" else None,
        "bounds": element.get("bounds", ""),
        "geometry": geometry_wkt,  # Store WKT format
        "type": full_type,
        "details": tags,  # Remaining tags
    })

sna_df = pd.DataFrame(sna)



In [159]:
sna_df.to_csv("./processed_data/skiing_and_winter_sports.csv", index=False, sep=";")

### Waterfall and Spring

In [153]:
with open('./raw_data/waterfall_and_spring.geojson', "r") as file:
    data = json.load(file)

In [156]:

# Initialize list for skiing & piste areas
was = []

# Process each element in the JSON data
for element in data["elements"]:
    tags = element.get("tags", {}).copy()  # Copy tags to avoid modifying original

    geometry_wkt = None  # Default empty geometry

    if element["type"] == "node":
        # Nodes: Represented as Points
        lat = element.get("lat")
        lon = element.get("lon")
        if lat and lon:
            geometry_wkt = Point(lon, lat).wkt

    elif element["type"] in ["way", "relation"]:
        # Ways/Relations: Convert geometry list to Polygon (or LineString)
        geom_data = element.get("geometry", [])
        if isinstance(geom_data, list) and len(geom_data) >= 3:
            coords = [(pt["lon"], pt["lat"]) for pt in geom_data]
            # Ensure it forms a closed polygon
            if coords[0] != coords[-1]:
                coords.append(coords[0])
            try:
                geometry_wkt = Polygon(coords).wkt  # Polygon WKT
            except Exception as e:
                print(f"Error creating polygon for WAS {element['id']}: {e}")

        elif isinstance(geom_data, list) and len(geom_data) >= 2:
            # If not enough points for a polygon, store as LineString
            coords = [(pt["lon"], pt["lat"]) for pt in geom_data]
            try:
                geometry_wkt = LineString(coords).wkt  # LineString WKT
            except Exception as e:
                print(f"Error creating linestring for WAS {element['id']}: {e}")

    # Build type field with sport and piste details
    sport_type = tags.pop("sport", "skiing")
    piste_type = tags.pop("piste:type", "")
    full_type = f"{sport_type}:{piste_type}" if piste_type else sport_type

    # Append to list
    was.append({
        "id": f"PA_id_{element['id']}",
        "name": tags.pop("name", ""),
        "latitude": element.get("lat") if element["type"] == "node" else None,
        "longitude": element.get("lon") if element["type"] == "node" else None,
        "bounds": element.get("bounds", ""),
        "geometry": geometry_wkt,  # Store WKT format
        "type": full_type,
        "details": tags,  # Remaining tags
    })

was_df = pd.DataFrame(was)



In [157]:
was_df.head()

Unnamed: 0,id,name,latitude,longitude,bounds,geometry,type,details
0,PA_id_182981020,Sorgente del Brenta,46.007568,11.265688,,POINT (11.2656884 46.0075682),skiing,"{'drinking_water': 'no', 'natural': 'spring'}"
1,PA_id_270176737,,45.880423,10.82429,,POINT (10.8242897 45.8804228),skiing,"{'drinking_water': 'yes', 'ele': '1116', 'natu..."
2,PA_id_287670802,,46.2553,11.666197,,POINT (11.6661973 46.2552996),skiing,"{'amenity': 'drinking_water', 'natural': 'spri..."
3,PA_id_316143276,Sorgente del Pra,45.855293,11.048271,,POINT (11.0482711 45.8552932),skiing,{'natural': 'spring'}
4,PA_id_395572833,,45.838584,10.80842,,POINT (10.8084198 45.8385841),skiing,"{'amenity': 'drinking_water', 'natural': 'spri..."


In [158]:
# Save to CSV
sna_df.to_csv("./processed_data/waterfall_and_spring.csv", index=False, sep=";")
