# Import geo-data of "ÖV" in Geneva using swisstopo API

## Libraries and settings

In [65]:
# Libraries
import os
import requests
import json
import folium
import pandas as pd
from pandas import json_normalize

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# Show current working directory
print(os.getcwd())

/workspaces/project_arm_airbnb/data_oev


## API request

### Train stations

In [50]:
# Overpass API URL
url = 'http://overpass-api.de/api/interpreter'

# Overpass turbo query
query = f"""
        [out:json];
        area["ISO3166-2"="CH-GE"][admin_level=4];
        node ["railway"="station"](area);
        out;"""

# Web API request
r = requests.get(url, params={'data': query})
data = r.json()['elements']

# Save data to file
with open('station.json', 'w') as json_file:
    json.dump(data, json_file)

# Store data in data frame
df_train = json_normalize(data)

# Number of rows and columns
print(df_train.shape)

# First rows
df_train.head()

(13, 38)


Unnamed: 0,type,id,lat,lon,tags.ele,tags.internet_access,tags.internet_access:fee,tags.internet_access:operator,tags.name,tags.name:de,...,tags.name:azb,tags.name:en,tags.name:fa,tags.name:kk-Arab,tags.name:pnb,tags.name:sd,tags.name:ur,tags.network,tags.network:short,tags.network:wikidata
0,node,2388447150,46.231334,6.110271,424,wlan,no,SBB,Genève-Aéroport,Genf Flughafen,...,,,,,,,,,,
1,node,2754246079,46.220426,6.094808,428,,,,Vernier,,...,,,,,,,,,,
2,node,3053167578,46.263684,6.161146,386,,,,Creux-de-Genthod,,...,,,,,,,,,,
3,node,3053167580,46.27974,6.165805,388,no,,,Versoix,,...,,,,,,,,,,
4,node,3057742963,46.222273,6.076288,434,,,,Meyrin,,...,,,,,,,,,,


In [52]:
# load data from df_train intro locations_train
locations_train = df_train[['lat','lon', "tags.name"]]

#Create map
m = folium.Map(location=[46.2044, 6.1432], zoom_start=12)

# add marker to sympols
for i in range(0, len(locations_train)):
    folium.Marker([locations_train.iloc[i]['lat'], locations_train.iloc[i]['lon']],
                  popup=locations_train.iloc[i]['tags.name']).add_to(m)
    
# plot map
m


### Tram stops

In [39]:
# Overpass turbo query
query = f"""
        [out:json];
        area["ISO3166-2"="CH-GE"][admin_level=4];
        node ["railway"="tram_stop"](area);
        out;"""

# Web API request
r = requests.get(url, params={'data': query})
data = r.json()['elements']

# Save data to file
with open('tram.json', 'w') as json_file:
    json.dump(data, json_file)

# Store data in data frame
df_tram = json_normalize(data)

# Number of rows and columns
print(df_tram.shape)

# First rows
df_tram.head()


(180, 29)


Unnamed: 0,type,id,lat,lon,tags.ele,tags.name,tags.network,tags.operator,tags.public_transport,tags.railway,...,tags.website,tags.url,tags.contact:website,tags.layer,tags.opening_hours,tags.addr:city,tags.addr:postcode,tags.addr:street,tags.note,tags.start_date
0,node,35416998,46.218462,6.110622,430,Balexert,18,TPG,stop_position,tram_stop,...,,,,,,,,,,
1,node,35420048,46.22102,6.141444,401,Collège Sismondi,,TPG,stop_position,tram_stop,...,,,,,,,,,,
2,node,36663771,46.188926,6.131024,378,Pictet-Thellusson,,TPG,stop_position,tram_stop,...,,,,,,,,,,
3,node,297910640,46.200564,6.140755,375,Cirque,,TPG,stop_position,tram_stop,...,,,,,,,,,,
4,node,297914745,46.200847,6.143642,382,Place de Neuve,Unireso,TPG,stop_position,tram_stop,...,,,,,,,,,,


In [40]:
# Load data
locations_tram = df_tram[["lat", "lon", "tags.railway", "tags.name"]].loc[df_tram["tags.railway"].isin(['tram_stop'])]
print(locations_tram.head(5))

# Number of rows and columns
print(locations_tram.shape)

# Create map
map = folium.Map(location=[locations_tram.lat.mean(), 
                           locations_tram.lon.mean()], 
                 zoom_start=8, 
                 control_scale=True)

# Add maker symbols
for index, location_info in locations_tram.iterrows():
    folium.Marker([location_info["lat"], 
                   location_info["lon"]], 
                  popup=location_info["tags.name"]).add_to(map)

# Plot map
map

         lat       lon tags.railway          tags.name
0  46.218462  6.110622    tram_stop           Balexert
1  46.221020  6.141444    tram_stop   Collège Sismondi
2  46.188926  6.131024    tram_stop  Pictet-Thellusson
3  46.200564  6.140755    tram_stop             Cirque
4  46.200847  6.143642    tram_stop     Place de Neuve
(180, 4)


In [41]:
# drop duplicates based on tags.name
locations_tram = locations_tram.drop_duplicates(subset="tags.name")
print(locations_tram.head(5))

# Number of rows and columns
print(locations_tram.shape)

# Create map
map = folium.Map(location=[locations_tram.lat.mean(), 
                           locations_tram.lon.mean()], 
                 zoom_start=8, 
                 control_scale=True)

# Add maker symbols
for index, location_info in locations_tram.iterrows():
    folium.Marker([location_info["lat"], 
                   location_info["lon"]], 
                  popup=location_info["tags.name"]).add_to(map)

# Plot map
map

         lat       lon tags.railway          tags.name
0  46.218462  6.110622    tram_stop           Balexert
1  46.221020  6.141444    tram_stop   Collège Sismondi
2  46.188926  6.131024    tram_stop  Pictet-Thellusson
3  46.200564  6.140755    tram_stop             Cirque
4  46.200847  6.143642    tram_stop     Place de Neuve
(82, 4)


### Bus stops

In [42]:
# Overpass turbo query
query = f"""
        [out:json];
        area["ISO3166-2"="CH-GE"][admin_level=4];
        node ["highway"="bus_stop"](area);
        out;"""

# Web API request
r = requests.get(url, params={'data': query})
data = r.json()['elements']

# Save data to file
with open('bus.json', 'w') as json_file:
    json.dump(data, json_file)

# Store data in data frame
df_bus = json_normalize(data)

# Number of rows and columns
print(df_bus.shape)

# First rows
df_bus.head()

(1449, 54)


Unnamed: 0,type,id,lat,lon,tags.bus,tags.ele,tags.highway,tags.name,tags.operator,tags.public_transport,...,tags.contact:website,tags.name:zh,tags.nat_name,tags.description,tags.was:highway,tags.note,tags.website,tags.not:network:wikidata,tags.start_date,tags.tourist_bus
0,node,497307,46.222755,6.139638,yes,372.0,bus_stop,Nations,TPG,stop_position,...,,,,,,,,,,
1,node,35419951,46.220048,6.14563,yes,390.0,bus_stop,Maison de la Paix,TPG,stop_position,...,,,,,,,,,,
2,node,36473383,46.156441,6.003172,yes,422.0,bus_stop,Athenaz-Passeiry,TPG,stop_position,...,,,,,,,,,,
3,node,36473725,46.144418,5.965679,yes,341.0,bus_stop,Chancy-Douane,TPG,stop_position,...,,,,,,,,,,
4,node,36647210,46.220019,6.085083,yes,,bus_stop,Signal,TPG,stop_position,...,,,,,,,,,,


In [43]:
# Load data
locations = df_bus[["lat", "lon", "tags.highway", "tags.name"]].loc[df_bus["tags.highway"].isin(['bus_stop'])]
print(locations.head(5))

# Number of rows and columns
print(locations.shape)

         lat       lon tags.highway          tags.name
0  46.222755  6.139638     bus_stop            Nations
1  46.220048  6.145630     bus_stop  Maison de la Paix
2  46.156441  6.003172     bus_stop   Athenaz-Passeiry
3  46.144418  5.965679     bus_stop      Chancy-Douane
4  46.220019  6.085083     bus_stop             Signal
(1449, 4)


In [None]:
# drop duplicates based on tags.name
locations = locations.drop_duplicates(subset="tags.name")
print(locations.head(5))

# Number of rows and columns
print(locations.shape)

# Create map
map = folium.Map(location=[locations.lat.mean(), 
                           locations.lon.mean()], 
                 zoom_start=8, 
                 control_scale=True)

# Add maker symbols
for index, location_info in locations.iterrows():
    folium.Marker([location_info["lat"], 
                   location_info["lon"]], 
                  popup=location_info["tags.name"]).add_to(map)

# Plot map
map

         lat       lon tags.highway          tags.name
0  46.222755  6.139638     bus_stop            Nations
1  46.220048  6.145630     bus_stop  Maison de la Paix
2  46.156441  6.003172     bus_stop   Athenaz-Passeiry
3  46.144418  5.965679     bus_stop      Chancy-Douane
4  46.220019  6.085083     bus_stop             Signal
(738, 4)


### Still too many bus stops and hence not used for further analysis

### Create SQLite db with train stations and tram stops

In [70]:
# simplify the dataframes of df_train
df_train = df_train[["lat", "lon", "tags.name"]]

# add new column to the dataframe with the name tags.railway and fill it with "train"
df_train["tags.railway"] = "train"
print(df_train.head(5))
# print shape of the dataframe
print(df_train.shape)

# simplify the dataframes of df_tram 
df_tram = df_tram[["lat", "lon", "tags.name"]]

# add new column to the dataframe with the name tags.railway and fill it with "tram"
df_tram["tags.railway"] = "tram"
print(df_tram.head(5))
# print shape of the dataframe
print(df_tram.shape)

# concat the dataframes of df_train and df_tram
df_oev = pd.concat([df_train, df_tram], ignore_index=True)
print(df_oev.head())

# print shape of the dataframe
print(df_oev.shape)


         lat       lon         tags.name tags.railway
0  46.231334  6.110271   Genève-Aéroport        train
1  46.220426  6.094808           Vernier        train
2  46.263684  6.161146  Creux-de-Genthod        train
3  46.279740  6.165805           Versoix        train
4  46.222273  6.076288            Meyrin        train
(13, 4)
         lat       lon          tags.name tags.railway
0  46.218462  6.110622           Balexert         tram
1  46.221020  6.141444   Collège Sismondi         tram
2  46.188926  6.131024  Pictet-Thellusson         tram
3  46.200564  6.140755             Cirque         tram
4  46.200847  6.143642     Place de Neuve         tram
(180, 4)
         lat       lon         tags.name tags.railway
0  46.231334  6.110271   Genève-Aéroport        train
1  46.220426  6.094808           Vernier        train
2  46.263684  6.161146  Creux-de-Genthod        train
3  46.279740  6.165805           Versoix        train
4  46.222273  6.076288            Meyrin        train
(193,

### Data Storage in SQLite

In [74]:
# store data in sqlite database
import sqlite3
import fnmatch

# Function to close a sqlite db-connection
def check_conn(conn):
     try:
        conn.cursor()
        return True
     except Exception as ex:
        return False

# create a connection to the database
conn = sqlite3.connect('/workspaces/project_arm_airbnb/data/oev.db')

# Create cursor object to execute SQL commands
cursor = conn.cursor()

# Show dbs in the directory
flist = fnmatch.filter(os.listdir('.'), '*.db')
for i in flist:
    print(i)

# store the data in the database
df_oev.to_sql('oev', conn, if_exists='replace', index=False)

193

In [77]:
# query the data from the database and check if the data is stored correctly
query = "SELECT * FROM oev"
df3 = pd.read_sql(query, conn)
print(df3.head())

# print shape of the dataframe
print(df3.shape)

         lat       lon         tags.name tags.railway
0  46.231334  6.110271   Genève-Aéroport        train
1  46.220426  6.094808           Vernier        train
2  46.263684  6.161146  Creux-de-Genthod        train
3  46.279740  6.165805           Versoix        train
4  46.222273  6.076288            Meyrin        train
(193, 4)


In [78]:
# Close db connection (if open)
try:
    if check_conn(conn):
        conn.close()
    else:
        pass
except:
    pass

# Status (True = open, False = closed)
print(check_conn(conn))

False
