## Import Packages

In [1]:
import pandas as pd
import json
import geopandas as gpd
from pprint import pprint
from shapely.geometry import Point, LineString, Polygon


## Process Data

In [125]:
def geometry_from_overpass(element):
    coords = [(coord["lon"], coord["lat"]) for coord in element["geometry"]]

    if coords[0] == coords[-1]:
        return Polygon(coords)
    else:
        return LineString(coords)


def get_gdf(data_dir):
    try:
        with open(data_dir, encoding="utf-8") as f:
            data = json.load(f)
    except:
        exception = "Error: Could not load the file"
        return exception
            
    features = []

    for element in data["elements"]:
        el_type = element["type"]
        tags = element.get("tags", {})
        tags["id"] = element["id"]
        element["tags"]["name"] = element["tags"].get("name", "Unnamed")
        element["tags"]["latitude"] = element.get("lat", None)
        element["tags"]["longitude"] = element.get("lon", None)

        if el_type in ["way", "relation"] and "geometry" in element:
            geom = geometry_from_overpass(element)
            features.append({**tags, "geometry": geom})

        elif el_type == "node":
            lon = element["lon"]
            lat = element["lat"]
            geom = Point(lon, lat)
            features.append({**tags, "geometry": geom})

    return gpd.GeoDataFrame(features, crs="EPSG:4326")


### Caves

In [305]:
gdf = get_gdf("./raw_data/caves.geojson")
caves_columns = ["id", "name", "longitude","latitude","geometry"]
caves_df = gdf[caves_columns]
caves_df[:]['type'] = 'cave'

In [306]:
caves_df.to_csv("./processed_data/caves.csv", index=False, sep=";")

### Cultural Attractions

These include
* artworks
* memorials
* Galleries and Museums
* Castles

In [307]:
gdf = get_gdf("./raw_data/cultural_attractions.geojson")


In [308]:
artworks_df = gdf[gdf.tourism=='artwork'][['id', 'name', 'latitude', 'longitude','geometry', 'artist_name', 'description', 'source:website','artwork_type']]
memorials_df = gdf[gdf.historic.isin(['memorial','monument','castle']) ][['id', 'name', 'latitude', 'longitude','geometry','memorial', 'historic', 'inscription']]
gallery_and_museum_df = gdf[gdf.tourism.isin(['gallery','museum'])][['id', 'name', 'latitude', 'longitude','geometry','contact:website','tourism','addr:street','addr:city','addr:postcode','addr:housenumber']]

In [309]:
artworks_df.rename(columns={'artwork_type':'type', 'source:website':'website'}, inplace=True)
memorials_df['type'] = memorials_df['historic'] + memorials_df['memorial']
memorials_df.rename(columns={'memorial':'memorial_type'}, inplace=True)
memorials_df.drop(columns=['historic'], inplace=True)
gallery_and_museum_df.rename(columns={'addr:street':'street', 'addr:city':'city', 'addr:postcode':'postcode', 'addr:housenumber':'housenumber', 'contact:website':'website'}, inplace=True)

In [176]:
artworks_df.to_csv("./processed_data/artworks.csv", index=False, sep=";")
memorials_df.to_csv("./processed_data/memorials.csv", index=False, sep=";")
gallery_and_museum_df.to_csv("./processed_data/gallery_and_museums.csv", index=False, sep=";")

### Food and Drink Establishments

In [196]:
gdf = get_gdf("./raw_data/food_and_drink_establishments.geojson")


In [216]:
food_and_drink_establishments_df = gdf[['id','latitude','longitude','geometry','name','cuisine','operator',
                                        'addr:street','addr:city','addr:postcode','addr:housenumber',
                                        'contact:website','phone','contact:phone','contact:email','website',
                                        'amenity'
                                        ]].copy(deep=True)

In [217]:
food_and_drink_establishments_df.website.isna().sum(), food_and_drink_establishments_df['contact:website'].isna().sum(), 
food_and_drink_establishments_df[:]['website'] = food_and_drink_establishments_df['website'].combine_first(food_and_drink_establishments_df['contact:website'])
food_and_drink_establishments_df[:]['phone'] = food_and_drink_establishments_df['phone'].combine_first(food_and_drink_establishments_df['contact:phone'])
food_and_drink_establishments_df.rename(columns={'addr:street':'street', 'addr:city':'city', 'addr:postcode':'postcode', 'addr:housenumber':'housenumber','amenity':'type','contact:email':'email'}, inplace=True)
food_and_drink_establishments_df.drop(columns=['contact:website', 'contact:phone'], inplace=True)

In [218]:
food_and_drink_establishments_df.to_csv("./processed_data/food_and_drink_establishments.csv", index=False,sep=";")

### Holiday Apartments and Houses

In [219]:
gdf = get_gdf("./raw_data/holiday_apartments_and_houses.geojson")

holiday_apartments_and_houses_df = gdf[['id','latitude','longitude','geometry','name','addr:street', 'addr:city','addr:postcode','addr:housenumber','contact:website','contact:phone','contact:email','website','phone','email','tourism']].copy(deep=True)


In [236]:
holiday_apartments_and_houses_df[:]['website'] = holiday_apartments_and_houses_df['website'].combine_first(holiday_apartments_and_houses_df['contact:website'])
holiday_apartments_and_houses_df[:]['phone'] = holiday_apartments_and_houses_df['phone'].combine_first(holiday_apartments_and_houses_df['contact:phone'])
holiday_apartments_and_houses_df[:]['email'] = holiday_apartments_and_houses_df['email'].combine_first(holiday_apartments_and_houses_df['contact:email'])
holiday_apartments_and_houses_df.rename(columns={'addr:street':'street', 'addr:city':'city', 'addr:postcode':'postcode', 'addr:housenumber':'housenumber','tourism':'type'}, inplace=True)

holiday_apartments_and_houses_df.drop(columns=['contact:website', 'contact:phone', 'contact:email'], inplace=True)

In [238]:
holiday_apartments_and_houses_df.to_csv("./processed_data/holiday_apartments_and_houses.csv", index=False, sep=";")


### Hotels and Accomodation

In [239]:
gdf = get_gdf("./raw_data/hotels_and_accommodation.geojson")

hotels_and_accommodation_df = gdf[['id','latitude','longitude','geometry','name','tourism',
                                   'addr:street', 'addr:city','addr:postcode','addr:housenumber',
                                   'cuisine','description','operator',
                                   'contact:website','contact:phone','contact:email','website','phone','email']].copy(deep=True)



In [240]:
hotels_and_accommodation_df[:]['website'] = hotels_and_accommodation_df['website'].combine_first(hotels_and_accommodation_df['contact:website'])
hotels_and_accommodation_df[:]['phone'] = hotels_and_accommodation_df['phone'].combine_first(hotels_and_accommodation_df['contact:phone'])
hotels_and_accommodation_df[:]['email'] = hotels_and_accommodation_df['email'].combine_first(hotels_and_accommodation_df['contact:email'])
hotels_and_accommodation_df.rename(columns={'addr:street':'street', 'addr:city':'city', 'addr:postcode':'postcode', 'addr:housenumber':'housenumber','tourism':'type'}, inplace=True)

hotels_and_accommodation_df.drop(columns=['contact:website', 'contact:phone', 'contact:email'], inplace=True)

In [244]:
hotels_and_accommodation_df.to_csv("./processed_data/hotels_and_accommodation.csv", index=False,sep=";")


### Lakes and Rivers

In [247]:
with open('./raw_data/lakes_and_rivers.geojson', "r") as file:
    data = json.load(file)


In [258]:
# Initialize lists to hold lake and river data
lakes = []
rivers = []

# Process each element in the JSON data
for element in data["elements"]:
    tags = element.get("tags", {})
    
    # Extract lakes based on 'natural=water' and 'water=lake'
    if tags.get("natural") == "water" and tags.get("water") == "lake":
        if element["type"] == "relation":
            lakes.append({
                "id": element["id"],
                "name": tags.get("name", "Unnamed"),
                "latitude": element.get("center", {}).get("lat") if element["type"] == "relation" else element.get("lat"),
                "longitude": element.get("center", {}).get("lon") if element["type"] == "relation" else element.get("lon"),
                "bounds": element.get("bounds", ''),
                "geometry":element.get("members", {})[0].get("geometry", ''),
                "type": "lake",
                
            })
            
    
    # Extract rivers based on 'waterway=river'
    elif tags.get("waterway") == "river":
        rivers.append({
            "id": element["id"],
            "name": tags.get("name", "Unnamed"),
            "latitude": element.get("center", {}).get("lat") ,
            "longitude": element.get("center", {}).get("lon"),
            "bounds": element.get("bounds", ''),
            "geometry":element.get("geometry", ''),
            "type": "river",
        })

# Create DataFrames for lakes and rivers
lakes_df = pd.DataFrame(lakes)
rivers_df = pd.DataFrame(rivers)




In [259]:
rivers_df.shape

(163, 7)

In [268]:
lakes_df.to_csv("./processed_data/lakes.csv", index=False, sep=";")
rivers_df.to_csv("./processed_data/rivers.csv", index=False, sep=";")

### Beaches

In [261]:
with open('./raw_data/beaches.geojson', "r") as file:
    data = json.load(file)


In [262]:
tag_items = {tag for element in data["elements"] for tag in element.get("tags", {})}
tag_items

{'access',
 'created_by',
 'description',
 'dog',
 'leisure',
 'lifeguard',
 'name',
 'natural',
 'sport',
 'supervised',
 'surface'}

In [266]:
# Initialize lists to hold lake and river data
beach = []

# Process each element in the JSON data
for element in data["elements"]:
    tags = element.get("tags", {})
    # Extract beaches based on 'natural=beach' 
    if tags.get("natural") == "beach":
        if element["type"] == "node":
            beach.append({
                "id": "beach_id_"+str(element["id"]),
                "name": tags.get("name", "Unnamed"),
                "latitude": element.get("lat"),
                "longitude": element.get("lon"),
                "bounds": element.get("bounds", ''),
                "geometry":element.get("geometry", ''),
                "type": tags.get("natural", "beach")+(':'+tags.get("surface", "")) if tags.get("surface") else 'beach',
            })
        else:
            beach.append({
                "id": "beach_id_"+str(element["id"]),
                "name": tags.get("name", "Unnamed"),
                "latitude": element.get("lat"),
                "longitude": element.get("lon"),
                "bounds": element.get("bounds", ''),
                "geometry":element.get("geometry", ''),
                "type": tags.get("natural", "beach")+(':'+tags.get("surface", "")) if tags.get("surface") else 'beach',
                
            })
    

# Create DataFrames for beach
beach_df = pd.DataFrame(beach)




In [269]:
beach_df.to_csv("./processed_data/beaches.csv", index=False, sep=";")

### Peaks and Viewpoints

In [275]:
gdf = get_gdf("./raw_data/peaks_and_viewpoints.geojson")


peaks_and_viewpoints_df = gdf[["id", "name", "longitude","latitude","geometry", "description","historic", "amenity","height","website","natural" ]].copy(deep=True)
peaks_and_viewpoints_df.rename(columns={'natural':'type'}, inplace=True)

In [277]:
peaks_and_viewpoints_df.to_csv("./processed_data/peaks_and_viewpoints.csv", index=False, sep=";")

### Protected Areas

In [286]:
with open('./raw_data/protected_areas.geojson', "r") as file:
    data = json.load(file)

In [287]:


protected_areas = []


for element in data["elements"]:
    tags = element.get("tags", {})
    protected_areas.append({
        "id": "PA_id_"+ str(element["id"]),
        "latitude": element['center']["lat"],
        "longitude": element['center']["lon"],
        "geometry": element.get("geometry", ''),
        "name": tags.get("name"),
        "source": tags.get("source"),
        "website": tags.get("website"),
        "protection_title": tags.get("protection_title"),
        "leisure": tags.get("leisure"),
        "type":'protected area'
        
    })
    
    
protected_areas_df = pd.DataFrame(protected_areas)




In [288]:
protected_areas_df

Unnamed: 0,id,latitude,longitude,geometry,name,source,website,protection_title,leisure,type
0,PA_id_4710461,45.884332,10.692263,,Rete di riserve Alpi Ledrensi,Provincia autonoma di Trento,https://www.reteriservealpiledrensi.tn.it/,,,protected area
1,PA_id_15595792,46.490497,11.586921,,Naturpark Schlern-Rosengarten – Parco naturale...,EEA CDDA (v20),https://parchi-naturali.provincia.bz.it/parco-...,Parco naturale,nature_reserve,protected area
2,PA_id_15638582,46.53694,11.668578,,Landschaftsschutzgebiet Seiser Alm – Zona di T...,"""Piani paesaggistici: Delimitazioni dei Piani ...",https://seiseralm-schlerngebiet.com/seiseralm/...,Zona di Tutela Paesaggistica,,protected area


In [297]:
protected_areas_df.to_csv("./processed_data/protected_areas.csv", index=False, sep=";")

### Skiing and Winter Sports facility

In [320]:
with open('./raw_data/skiing_and_winter_sports.geojson', "r") as file:
    data = json.load(file)

In [323]:
sna = []

for element in data["elements"]:
    tags = element.get("tags", {})
    if element.get("type") == "way":
        sna.append({
            "id": "PA_id_"+ str(element["id"]),
            "name": tags.pop("name",''),
            "latitude": '',
            "longitude": '',
            "bounds": element.pop("bounds", ''),
            "geometry":element.pop("geometry", ''),
            "type": tags.pop("sport",'skiing')+(':'+tags.get("piste:type",'')) if tags.get("piste:type") else '',
            "details": tags,
            
        })
    else:
        sna.append({
            "id": "PA_id_"+ str(element["id"]),
            "name": tags.pop("name",''),
            "latitude": element.pop("lat"),
            "longitude": element.pop("lon"),
            "bounds": element.pop("bounds", ''),
            "geometry":element.pop("geometry", ''),
            "type": tags.pop("sport",'skiing')+(':'+tags.get("piste:type",'')) if tags.get("piste:type") else '',
            "details": tags,
            
        })
    
    
sna_df = pd.DataFrame(sna)




In [296]:
sna_df.to_csv("./processed_data/skiing_and_winter_sports.csv", index=False, sep=";")

### Waterfall and Spring

In [298]:
with open('./raw_data/waterfall_and_spring.geojson', "r") as file:
    data = json.load(file)

In [301]:
was = []


for element in data["elements"]:
    tags = element.get("tags", {})
    was.append({
        "id": "WaS_id_"+ str(element["id"]),
        "latitude": element["lat"] if element["type"] == "node" else element['center']["lat"],
        "longitude": element["lon"] if element["type"] == "node" else element['center']["lon"],
        "name": tags.get("name",''),
        "amenity": tags.get("amenity",''),
        "description": tags.get("description",''),
        "type": tags.get("natural",'waterfall'),
        
        
    })
    
    
was_df = pd.DataFrame(was)




In [302]:
was_df.shape

(297, 7)

In [303]:
was_df.to_csv("./processed_data/waterfall_and_spring.csv", index=False, sep=";")