# Import geo-data of "tourism" in Geneva using overpass API

# Libraries and settings

In [2]:
# Libraries
import os
import requests
import json
import folium
from pandas import json_normalize
import pandas as pd
from IPython.display import display

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Show current working directory
print(os.getcwd())


/workspaces/project_arm_airbnb/data_maps


## API request

### Touristenattraktionen

In [3]:
# Overpass API URL
url = 'http://overpass-api.de/api/interpreter'

# Overpass turbo query
query = f"""
        [out:json];
        area["ISO3166-2"="CH-GE"][admin_level=4];
        node ["tourism"="attraction"](area);
        out;"""

# Web API request
r = requests.get(url, params={'data': query})
data = r.json()['elements']

# Save data to file
with open('attraction.json', 'w') as json_file:
    json.dump(data, json_file)

# Store data in data frame
df_attraction = json_normalize(data)

# Number of rows and columns
print(df_attraction.shape)

# First rows
df_attraction.head(5)

(41, 61)


Unnamed: 0,type,id,lat,lon,tags.archaeological_site,tags.denotation,tags.ele,tags.geological,tags.historic,tags.man_made,...,tags.email,tags.internet_access,tags.leisure,tags.level,tags.opening_hours,tags.length,tags.url,tags.width,tags.alt,tags.access
0,node,25648489,46.205673,6.154324,petroglyph,natural_monument,373.6,glacial_erratic,archaeological_site,survey_point,...,,,,,,,,,,
1,node,60018172,46.207381,6.155889,,,,,,,...,,,,,,,,,,
2,node,139446272,46.12855,5.957381,,,,,boundary_stone,,...,,,,,,,,,,
3,node,151258480,46.132357,5.955911,,,,,,,...,,,,,,,,,,
4,node,613159167,46.142711,6.126112,,,,,yes,tower,...,,,,,,,,,,


In [4]:
# load data from df_attraction into locations_attraction
locations_attraction = df_attraction[['lat','lon', "tags.name"]]

#Create map
m = folium.Map(location=[46.2044, 6.1432], zoom_start=12)

# add marker to symbols
for i in range(0, len(locations_attraction)):
    folium.Marker([locations_attraction.iloc[i]['lat'], locations_attraction.iloc[i]['lon']],
                  popup=f"{locations_attraction.iloc[i]['tags.name']}").add_to(m)
    
# plot map
m

In [5]:
# drop duplicates based on tags.name
locations_attraction = locations_attraction.drop_duplicates(subset="tags.name")
print(locations_attraction.head(5))

# Number of rows and columns
print(locations_attraction.shape)

# Create map
map = folium.Map(location=[locations_attraction.lat.mean(), 
                           locations_attraction.lon.mean()], 
                 zoom_start=8, 
                 control_scale=True)

# Add maker symbols
for index, locations_attraction in locations_attraction.iterrows():
    folium.Marker([locations_attraction["lat"], 
                   locations_attraction["lon"]], 
                  popup=locations_attraction["tags.name"]).add_to(map)


# Plot map
map

         lat       lon                              tags.name
0  46.205673  6.154324                        Pierre du Niton
1  46.207381  6.155889                              Jet d'eau
2  46.128550  5.957381                              Borne n°1
3  46.132357  5.955911  Le point le plus occidental de Suisse
4  46.142711  6.126112                                    NaN
(37, 3)


## Museum

In [6]:
# Overpass API URL
url = 'http://overpass-api.de/api/interpreter'

# Overpass turbo query
query = f"""
        [out:json];
        area["ISO3166-2"="CH-GE"][admin_level=4];
        node ["tourism"="museum"](area);
        out;"""

# Web API request
r = requests.get(url, params={'data': query})
data = r.json()['elements']

# Save data to file
with open('museum.json', 'w') as json_file:
    json.dump(data, json_file)

# Store data in data frame
df_museum = json_normalize(data)

# Number of rows and columns
print(df_museum.shape)

# First rows
df_museum.head(5)

(15, 41)


Unnamed: 0,type,id,lat,lon,tags.fee,tags.museum,tags.name,tags.name:en,tags.name:es,tags.name:fr,...,tags.contact:instagram,tags.contact:twitter,tags.email,tags.official_name,tags.short_name,tags.addr:housename,tags.operator:type,tags.description,tags.internet_access,tags.contact:website
0,node,701851300,46.233972,6.055721,no,science,Globe de la Science et de l'Innovation,The Globe of Science and Innovation,Globo de la Ciencia y de la Innovación,Globe de la Science et de L'innovation,...,,,,,,,,,,
1,node,981690477,46.184291,6.139151,no,,Musée de Carouge,,,,...,,,,,,,,,,
2,node,983366077,46.198282,6.108982,,,Musée des EAN,,,Musée des EAN,...,,,,,,,,,,
3,node,1768216261,46.220829,6.152126,no,,Musée d'histoire des sciences,,,,...,,,,,,,,,,
4,node,2021513198,46.198828,6.137523,yes,art,MAMCO,,,,...,https://www.instagram.com/mamco_geneve/,https://twitter.com/mamco_artmuseum,info@mamco.ch,Musée d’Art Moderne et Contemporain,MAMCO,,,,,


In [7]:
# load data from df_museum into locations_museum
locations_museum = df_museum[['lat','lon', "tags.name"]]

#Create map
m = folium.Map(location=[46.2044, 6.1432], zoom_start=12)

# add marker to sympols
for i in range(0, len(locations_museum)):
    folium.Marker([locations_museum.iloc[i]['lat'], locations_museum.iloc[i]['lon']],
                  popup=locations_museum.iloc[i]['tags.name']).add_to(m)
    
# plot map
m

In [8]:
# drop duplicates based on tags.name
locations_museum = locations_museum.drop_duplicates(subset="tags.name")
print(locations_museum.head(5))

# Number of rows and columns
print(locations_museum.shape)

# Create map
map = folium.Map(location=[locations_museum.lat.mean(), 
                           locations_museum.lon.mean()], 
                 zoom_start=8, 
                 control_scale=True)

# Add maker symbols
for index, locations_museum in locations_museum.iterrows():
    folium.Marker([locations_museum["lat"], 
                   locations_museum["lon"]], 
                  popup=locations_museum["tags.name"]).add_to(map)


# Plot map
map

         lat       lon                               tags.name
0  46.233972  6.055721  Globe de la Science et de l'Innovation
1  46.184291  6.139151                        Musée de Carouge
2  46.198282  6.108982                           Musée des EAN
3  46.220829  6.152126           Musée d'histoire des sciences
4  46.198828  6.137523                                   MAMCO
(15, 3)


## Combine museum and attraction dataframes

In [9]:
# create new data frame for museums to be used in the final data frame
dfm = df_museum[['id', 'lat', 'lon', 'tags.name', 'tags.tourism']]
dfm.rename(columns={'tags.name': 'name', 'tags.tourism': 'category'}, inplace=True)
dfm.head()

Unnamed: 0,id,lat,lon,name,category
0,701851300,46.233972,6.055721,Globe de la Science et de l'Innovation,museum
1,981690477,46.184291,6.139151,Musée de Carouge,museum
2,983366077,46.198282,6.108982,Musée des EAN,museum
3,1768216261,46.220829,6.152126,Musée d'histoire des sciences,museum
4,2021513198,46.198828,6.137523,MAMCO,museum


In [14]:
# create new data frame for museums to be used in the final data frame
dfa = df_attraction[['id', 'lat', 'lon', 'tags.name', 'tags.tourism']]
dfa.rename(columns={'tags.name': 'name', 'tags.tourism': 'category'}, inplace=True)

# drop missing values
dfa = dfa.dropna()
dfa.head()

# add the museums to the attractions data frame
df_tourist = pd.concat([dfm, dfa], ignore_index=True)

# print shape of the data
print(f"Dataframe shape: {df_tourist.shape}")
df_tourist.head()


Dataframe shape: (52, 5)


Unnamed: 0,id,lat,lon,name,category
0,701851300,46.233972,6.055721,Globe de la Science et de l'Innovation,museum
1,981690477,46.184291,6.139151,Musée de Carouge,museum
2,983366077,46.198282,6.108982,Musée des EAN,museum
3,1768216261,46.220829,6.152126,Musée d'histoire des sciences,museum
4,2021513198,46.198828,6.137523,MAMCO,museum


In [15]:
# check for duplicates
duplicates = df_tourist.duplicated(subset=['name']).sum()
print(f"Number of duplicates: {duplicates}")

# drop duplicates
df_tourist = df_tourist.drop_duplicates(subset=['name'])
print(f"Dataframe shape: {df_tourist.shape}")

# check for duplicates again
duplicates = df_tourist.duplicated(subset=['name']).sum()
print(f"Number of duplicates new: {duplicates}")

# print shape of dataframe
print(f"Dataframe shape final: {df_tourist.shape}")

Number of duplicates: 1
Dataframe shape: (51, 5)
Number of duplicates new: 0
Dataframe shape final: (51, 5)


## Store df_ta in SQLite database

In [18]:
# store data in sqlite database
import sqlite3
import fnmatch

# Function to close a sqlite db-connection
def check_conn(conn):
     try:
        conn.cursor()
        return True
     except Exception as ex:
        return False

# create a connection to the database
conn = sqlite3.connect('/workspaces/project_arm_airbnb/data/tourist.db')

# Create cursor object to execute SQL commands
cursor = conn.cursor()

# Show dbs in the directory
flist = fnmatch.filter(os.listdir('.'), '*.db')
for i in flist:
    print(i)

# store the data in the database
df_tourist.to_sql('tourist', conn, if_exists='replace', index=False)

51

In [22]:
# query the data from the database to check if the data was stored correctly
query = "SELECT * FROM tourist"
df2 = pd.read_sql(query, conn)

print(df2.shape)
df2.head()

(51, 5)


Unnamed: 0,id,lat,lon,name,category
0,701851300,46.233972,6.055721,Globe de la Science et de l'Innovation,museum
1,981690477,46.184291,6.139151,Musée de Carouge,museum
2,983366077,46.198282,6.108982,Musée des EAN,museum
3,1768216261,46.220829,6.152126,Musée d'histoire des sciences,museum
4,2021513198,46.198828,6.137523,MAMCO,museum


In [23]:
# Close db connection (if open)
try:
    if check_conn(conn):
        conn.close()
    else:
        pass
except:
    pass

# Status (True = open, False = closed)
print(check_conn(conn))

False


### Jupyter notebook --footer info--

In [1]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')

-----------------------------------
POSIX
Linux | 6.5.0-1025-azure
Datetime: 2025-01-08 07:39:08
Python Version: 3.11.10
-----------------------------------
