## Import Packages

In [3]:
import pandas as pd
import numpy as np 
import json
import geopandas as gpd

## Process Data

### Caves

In [4]:
with open('./raw_data/caves.geojson', "r") as file:
    data = json.load(file)

caves = []
for element in data["elements"]:
    if element["type"] == "node" and element.get("tags", {}).get("natural") == "cave_entrance":
        caves.append({
            "id": "cave_id_"+str(element["id"]),
            "name": element["tags"].get("name", "Unnamed"),
            "latitude": element["lat"],
            "longitude": element["lon"],
        })

caves_df = pd.DataFrame(caves)




In [3]:
caves_df

Unnamed: 0,id,name,latitude,longitude
0,cave_id_498740308,Bus del nono,45.782194,10.992617
1,cave_id_508788420,Prigione del Sasso,45.921416,10.888087
2,cave_id_600025838,Grotta Damiano Chiesa,45.853438,11.036204
3,cave_id_610991691,Unnamed,45.881934,10.884507
4,cave_id_641260808,Miniera dell'Aubis,46.111971,11.313204
...,...,...,...,...
484,cave_id_12093061624,Unnamed,46.252910,10.600871
485,cave_id_12123977435,Piccola grotta,45.829544,10.939140
486,cave_id_12124039308,Piccola grotta,45.833304,10.933865
487,cave_id_12176582581,Unnamed,45.816664,10.974215


In [4]:
caves_df.nunique()

id           489
name         242
latitude     489
longitude    489
dtype: int64

In [5]:
caves_df.isna().sum()

id           0
name         0
latitude     0
longitude    0
dtype: int64

In [None]:
caves_df.to_csv("./processed_data/caves.csv", index=False)

### Cultural Attractions

These include
* artworks
* memorials
* Galleries and Museums
* Castles

In [10]:
with open('./raw_data/cultural_attractions.geojson', "r") as file:
    data = json.load(file)

In [11]:

artworks = []
memorials = []
gallery_and_museum = []



for element in data["elements"]:
    tags = element.get("tags", {})
    if tags.get("tourism") == "artwork":
        artworks.append({
            "id": "artwork_id_"+ str(element["id"]),
            "name": tags.get("name", ""),
            "latitude": element["lat"],
            "longitude": element["lon"],
            "artist_name": tags.get("artist_name",""),
            "artwork_type": tags.get("artwork_type",""),
            "description": (tags.get("description") or "") + (tags.get("inscription") or ""),
            "website": tags.get("source:website",""),
        })
    elif tags.get("historic") == "memorial" or tags.get("historic") == "monument" or tags.get("historic") == "castle":
        memorials.append({
            "id": ("memorial_id_" if tags.get("historic")=="memorial" else "monument_id_")+ str(element["id"]),
            "name": tags.get("name", "Unnamed"),
            "latitude": element["lat"],
            "longitude": element["lon"],
            "memorial_type": tags.get("memorial",""),
            "historic_type": tags.get("historic",""),
            "inscription": tags.get("inscription",""),
            
        })
    elif tags.get("tourism") == "gallery" or tags.get("tourism") == "museum":
        gallery_and_museum.append({
            "id": ("gallery_id_" if tags.get("tourism")=="gallery" else "museum_id_")+ str(element["id"]),
            "name": tags.get("name", "Unnamed"),
            "latitude": element["lat"],
            "longitude": element["lon"],
            "website": tags.get("contact:website",''),
            "type": tags.get("tourism",''),
            "street": tags.get('addr:street', ''),
            "city": tags.get('addr:city', ''),
            "postcode": tags.get('addr:postcode', ''),
            "housenumber": tags.get('addr:housenumber', '')
        })

# Convert lists to DataFrames
artworks_df = pd.DataFrame(artworks)
memorials_df = pd.DataFrame(memorials)
gallery_and_museum_df = pd.DataFrame(gallery_and_museum)




In [12]:
len(artworks_df)+len(memorials_df)+len(gallery_and_museum_df)

994

In [13]:
gallery_and_museum_df.isna().sum()

id             0
name           0
latitude       0
longitude      0
website        0
type           0
street         0
city           0
postcode       0
housenumber    0
dtype: int64

In [14]:
artworks_df.to_csv("./processed_data/artworks.csv", index=False)
memorials_df.to_csv("./processed_data/memorials.csv", index=False)
gallery_and_museum_df.to_csv("./processed_data/gallery_and_museums.csv", index=False)

### Food and Drink Establishments

In [6]:
with open('./raw_data/food_and_drink_establishments.geojson', "r") as file:
    data = json.load(file)


food_and_drink_establishments = []


for element in data["elements"]:
    tags = element.get("tags", {})
    food_and_drink_establishments.append({
        "id": "FnD_id_"+ str(element["id"]),
        "latitude": element["lat"],
        "longitude": element["lon"],
        "name": tags.get("name", ""),
        "cuisine": tags.get("cuisine",''),
        "operator": tags.get("operator",''),
        "street": tags.get('addr:street', ''),
        "city": tags.get('addr:city', ''),
        "postcode": tags.get('addr:postcode', ''),
        "housenumber": tags.get('addr:housenumber', ''),
        "website": (tags.get("contact:website","") if tags.get("contact:website","") else tags.get("website","")),
        "phone": (tags.get("contact:phone","") if tags.get("contact:phone","") else tags.get("phone","")),
        "email": (tags.get("contact:email","") if tags.get("contact:email","") else tags.get("email","")),
        
    })
    
    
food_and_drink_establishments_df = pd.DataFrame(food_and_drink_establishments)




In [35]:
food_and_drink_establishments_df.shape

(2293, 13)

In [184]:
food_and_drink_establishments_df

Unnamed: 0,id,latitude,longitude,name,cuisine,operator,street,city,postcode,housenumber,website,phone,email
0,FnD_id_247887464,46.071826,11.140469,Dal Boccia,pasta;pizza;regional;seafood,,Via degli Alberti Poja,,38121-38123,13,,+39 0461 437903,
1,FnD_id_258441546,45.917165,11.053101,"Pizzeria ""Le palme""",pizza,,Via Panizza,,,56,,+39 0464 411201,
2,FnD_id_259973562,46.109610,11.140262,Il bosco incantato,,,Via del Lavaron,Trento,,2,,+39 0461 960826,
3,FnD_id_260573164,46.273027,11.416570,Bar Sport,,,,,,,,,
4,FnD_id_261081650,46.280372,11.434969,Bar La Torre,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2288,FnD_id_12193137181,45.886553,10.840359,Fuori Stile,,,,,,,,,
2289,FnD_id_12193149487,45.886585,10.840136,Basil Pizza alla Pala,pizza,,Viale Dante Alighieri,,,71,,,
2290,FnD_id_12265635488,46.443751,11.141733,Hotel Lago Smeraldo,,,Via Lago Smeraldo,Fondo,38013,12,https://www.hotellagosmeraldo.it/,+39 0463 831104,info@hotellagosmeraldo.it
2291,FnD_id_12279573100,46.296321,10.688133,Osteria Tipica Trentina,,,,,,,,,


In [None]:
food_and_drink_establishments_df.to_csv("./processed_data/food_and_drink_establishments.csv", index=False)

### Holiday Apartments and Houses

In [None]:
with open('./raw_data/holiday_apartments_and_houses.geojson', "r") as file:
    data = json.load(file)


holiday_apartments_and_houses = []


for element in data["elements"]:
    tags = element.get("tags", {})
    holiday_apartments_and_houses.append({
        "id": "HAmH_id_"+ str(element["id"]),
        "latitude": element["lat"],
        "longitude": element["lon"],
        "name": tags.get("name", ""),
        "tourism_type": tags.get("tourism", ""),
        "street": tags.get('addr:street', ''),
        "city": tags.get('addr:city', ''),
        "postcode": tags.get('addr:postcode', ''),
        "housenumber": tags.get('addr:housenumber', ''),
        "website": (tags.get("contact:website","") if tags.get("contact:website","") else tags.get("website","")),
        "phone": (tags.get("contact:phone","") if tags.get("contact:phone","") else tags.get("phone","")),
        "email": (tags.get("contact:email","") if tags.get("contact:email","") else tags.get("email","")),
        
    })
    
    
holiday_apartments_and_houses_df = pd.DataFrame(holiday_apartments_and_houses)




In [42]:
holiday_apartments_and_houses_df.shape

(244, 12)

In [None]:
holiday_apartments_and_houses_df.to_csv("./processed_data/holiday_apartments_and_houses.csv", index=False)


### Hotels and Accomodation

In [None]:
with open('./raw_data/hotels_and_accommodation.geojson', "r") as file:
    data = json.load(file)


In [39]:
tag_items = {tag for element in data["elements"] for tag in element.get("tags", {})}

tag_items

{'access',
 'addr:city',
 'addr:country',
 'addr:district',
 'addr:full',
 'addr:hamlet',
 'addr:housename',
 'addr:housenumber',
 'addr:place',
 'addr:postcode',
 'addr:province',
 'addr:street',
 'addr:suburb',
 'addr:unit',
 'agritourism',
 'air_conditioning',
 'alt_name',
 'amenity',
 'backcountry',
 'bar',
 'beds',
 'brand',
 'brand:wikidata',
 'breakfast',
 'building',
 'building:levels',
 'cafe',
 'capacity',
 'capacity:caravans',
 'caravans',
 'charge',
 'check_date',
 'check_date:currency:XBT',
 'check_date:internet_access',
 'contact:email',
 'contact:facebook',
 'contact:fax',
 'contact:instagram',
 'contact:mobile',
 'contact:name',
 'contact:phone',
 'contact:tripadvisor',
 'contact:website',
 'craft',
 'created_by',
 'cuisine',
 'currency:XBT',
 'delivery',
 'delivery:description',
 'description',
 'description:it',
 'designation',
 'diet:gluten_free',
 'diet:vegan',
 'diet:vegetarian',
 'disabled',
 'drinking_water',
 'ele',
 'ele:ellipsoid',
 'email',
 'entrance',
 'ent

In [None]:


hotels_and_accommodation = []


for element in data["elements"]:
    tags = element.get("tags", {})
    hotels_and_accommodation.append({
        "id": "HnA_id_"+ str(element["id"]),
        "latitude": element["lat"],
        "longitude": element["lon"],
        "name": tags.get("name", ""),
        "tourism_type": tags.get("tourism", ""),
        "street": tags.get('addr:street', ''),
        "city": tags.get('addr:city', ''),
        "postcode": tags.get('addr:postcode', ''),
        "housenumber": tags.get('addr:housenumber', ''),
        "cuisine": tags.get("cuisine",''),
        "description": tags.get("description",''),
        "operator": tags.get("operator",''),
        "website": (tags.get("contact:website","") if tags.get("contact:website","") else tags.get("website","")),
        "phone": (tags.get("contact:phone","") if tags.get("contact:phone","") else tags.get("phone","")),
        "email": (tags.get("contact:email","") if tags.get("contact:email","") else tags.get("email","")),
        
    })
    
    
hotels_and_accommodation_df = pd.DataFrame(hotels_and_accommodation)




In [54]:
hotels_and_accommodation_df.shape

(759, 15)

In [None]:
hotels_and_accommodation_df.to_csv("./processed_data/hotels_and_accommodation.csv", index=False)


### Lakes and Rivers

In [None]:
with open('./raw_data/lakes_and_rivers.geojson', "r") as file:
    data = json.load(file)


In [49]:
tag_items = {tag for element in data["elements"] for tag in element.get("tags", {})}


In [51]:
# Initialize lists to hold lake and river data
lakes = []
rivers = []

# Process each element in the JSON data
for element in data["elements"]:
    tags = element.get("tags", {})
    
    # Extract lakes based on 'natural=water' and 'water=lake'
    if tags.get("natural") == "water" and tags.get("water") == "lake":
        if element["type"] == "relation":
            lakes.append({
                "id": element["id"],
                "name": tags.get("name", "Unnamed"),
                "latitude": element.get("center", {}).get("lat") if element["type"] == "relation" else element.get("lat"),
                "longitude": element.get("center", {}).get("lon") if element["type"] == "relation" else element.get("lon"),
            })
    
    # Extract rivers based on 'waterway=river'
    elif tags.get("waterway") == "river":
        rivers.append({
            "id": element["id"],
            "name": tags.get("name", "Unnamed"),
            "latitude": element.get("center", {}).get("lat") ,
            "longitude": element.get("center", {}).get("lon"),
        })

# Create DataFrames for lakes and rivers
lakes_df = pd.DataFrame(lakes)
rivers_df = pd.DataFrame(rivers)




In [64]:
rivers_df.shape

(138, 4)

In [None]:
## TODO: redundant rivers names with multiple latitudes and longitudes
lakes_df.to_csv("./processed_data/lakes.csv", index=False)
rivers_df.to_csv("./processed_data/rivers.csv", index=False)

### Peaks and Viewpoints

In [None]:
with open('./raw_data/peaks_and_viewpoints.geojson', "r") as file:
    data = json.load(file)


In [58]:


peaks_and_viewpoints = []


for element in data["elements"]:
    tags = element.get("tags", {})
    peaks_and_viewpoints.append({
        "id": "PnV_id_"+ str(element["id"]),
        "latitude": element["lat"],
        "longitude": element["lon"],
        "name": tags.get("name"),
        "description": tags.get("description"),
        "historic": tags.get("historic"),
        "amenity": tags.get("amenity"),
        "height": tags.get("height"),
        "website": (tags.get("contact:website","") if tags.get("contact:website","") else tags.get("website","")),
        
    })
    
    
peaks_and_viewpoints_df = pd.DataFrame(peaks_and_viewpoints)




In [66]:
peaks_and_viewpoints_df.shape

(2542, 9)

In [None]:
peaks_and_viewpoints_df.to_csv("./processed_data/peaks_and_viewpoints.csv", index=False)

### Protected Areas

In [None]:
with open('./raw_data/protected_areas.geojson', "r") as file:
    data = json.load(file)

In [68]:


protected_areas = []


for element in data["elements"]:
    tags = element.get("tags", {})
    protected_areas.append({
        "id": "PA_id_"+ str(element["id"]),
        "latitude": element['center']["lat"],
        "longitude": element['center']["lon"],
        "name": tags.get("name"),
        "source": tags.get("source"),
        "website": tags.get("website"),
        "protection_title": tags.get("protection_title"),
        "leisure": tags.get("leisure"),
        
    })
    
    
protected_areas_df = pd.DataFrame(protected_areas)




In [73]:
protected_areas_df.shape

(3, 8)

In [None]:
protected_areas_df.to_csv("./processed_data/protected_areas.csv", index=False)

### Skiing and Winter Sports facility

In [None]:
with open('./raw_data/skiing_and_winter_sports.geojson', "r") as file:
    data = json.load(file)

In [75]:
tag_items = {tag for element in data["elements"] for tag in element.get("tags", {})}
tag_items

{'abandoned',
 'abandoned:railway',
 'access',
 'aerialway',
 'agricultural',
 'alt_name',
 'area',
 'bicycle',
 'bridge',
 'covered',
 'description',
 'description:de',
 'description:en',
 'description:it',
 'designation',
 'disabled',
 'emergency',
 'fee',
 'fixme',
 'foot',
 'forestry',
 'hgv',
 'highway',
 'horse',
 'incline',
 'lane_markings',
 'lanes',
 'layer',
 'leisure',
 'lit',
 'loc_ref',
 'man_made',
 'maxlength',
 'maxspeed',
 'motor_vehicle',
 'motorcar',
 'mtb:scale',
 'mtb:scale:uphill',
 'name',
 'name:de',
 'name:it',
 'note',
 'note:en',
 'note:it',
 'oneway',
 'oneway:bicycle',
 'piste:abandoned',
 'piste:difficulty',
 'piste:grooming',
 'piste:lit',
 'piste:name',
 'piste:oneway',
 'piste:ref',
 'piste:snowboard',
 'piste:type',
 'railway',
 'ref',
 'ref:hiking',
 'ref:pat',
 'route',
 'sac_scale',
 'segregated',
 'ski',
 'smoothness',
 'source',
 'source_ref',
 'sport',
 'surface',
 'tracktype',
 'trail_visibility',
 'tunnel',
 'website',
 'width',
 'wikipedia'}

In [None]:



sna = []


for element in data["elements"]:
    tags = element.get("tags", {})
    sna.append({
        "id": "PA_id_"+ str(element["id"]),
        "latitude": element["lat"] if element["type"] == "node" else element['center']["lat"],
        "longitude": element["lon"] if element["type"] == "node" else element['center']["lon"],
        "name": tags.get("name",''),
        "sport": tags.get("sport",''),
        "description": tags.get("description",''),
        
        
    })
    
    
sna_df = pd.DataFrame(sna)




In [79]:
sna_df.isna().shape

(1292, 6)

In [None]:
sna_df.to_csv("./processed_data/skiing_and_winter_sports.csv", index=False)

### Waterfall and Spring

In [None]:
with open('./raw_data/waterfall_and_spring.geojson', "r") as file:
    data = json.load(file)

In [81]:
tag_items = {tag for element in data["elements"] for tag in element.get("tags", {})}
tag_items

{'access',
 'amenity',
 'amenity_1',
 'check_date',
 'covered',
 'description',
 'drinking_water',
 'drinking_water:legal',
 'ele',
 'fee',
 'fixme',
 'intermittent',
 'loc_name',
 'mapillary',
 'material',
 'name',
 'name:cs',
 'name:de',
 'name:en',
 'name:it',
 'natural',
 'noref',
 'note',
 'operator',
 'place',
 'refitted',
 'source',
 'wheelchair',
 'wikimedia_commons'}

In [None]:
was = []


for element in data["elements"]:
    tags = element.get("tags", {})
    was.append({
        "id": "WaS_id_"+ str(element["id"]),
        "latitude": element["lat"] if element["type"] == "node" else element['center']["lat"],
        "longitude": element["lon"] if element["type"] == "node" else element['center']["lon"],
        "name": tags.get("name",''),
        "amenity": tags.get("amenity",''),
        "description": tags.get("description",''),
        
        
    })
    
    
was_df = pd.DataFrame(was)




In [85]:
was_df.shape

(295, 6)

In [None]:
was_df.to_csv("./processed_data/waterfall_and_spring.csv", index=False)