In [None]:
!pip install geopy  # streets to longitude/latitude
!pip install folium  # locations on map
!pip install bs4  # BeautifulSoup
!pip install opencage  # geocoder
!pip install haversine  # distance between two location in km

import requests
import pandas as pd
import folium
from geopy.geocoders import Nominatim
from opencage.geocoder import OpenCageGeocode
import haversine as hs

In [14]:
# credentials and global variables
CLIENT_ID = '3SLZWEBJPQFCQ45T0ZYZ12Q3YTPFE1STWEU2JJEABKXP0BRV'  # Foursquare ID
CLIENT_SECRET = 'DHKCO5GL4OMW2TUHIPF4KGSPD3AIJ4K0BMYATRAOVWARXZGM'
ACCESS_TOKEN = 'YMKJ1ZAEJUXFHF1H52VPX130RP0YSQIRNYPXAIMQRVNRMWWM'
VERSION = '20210505'
KEY = '1c9637a6e5db42e1847e51a9e6459c27'  # GeoCode
LOCATION = Nominatim(user_agent="foursquare_agent").geocode('Košice')
LAT = LOCATION.latitude
LNG = LOCATION.longitude

### Streets (borough and location)

In [None]:
# download list from website, keep only relevant columns
streets = pd.concat(pd.read_html('https://www.kosice.sk/mesto/katalog-ulic', 
                                 encoding='utf-8'))
streets.rename(columns={'Názov ulice': 'Street', 'Mestská časť': 'Borough'},
               inplace=True)
streets.drop(columns={'VZN', 'PSČ', 'Poznámka'}, errors='ignore', inplace=True)
streets.head()

In [None]:
# pair location to the streets using GeoCage (~ 930 streets, time consuming)
geocoder = OpenCageGeocode(KEY)
lat = []
lng = []
for i, s in streets.iterrows():
    query = s['Street'] + ', ' + s['Borough'] + ', Slovakia'
    print(query)
    location = geocoder.geocode(query)
    lat.append(location[0]['geometry']['lat'])
    lng.append(location[0]['geometry']['lng'])

streets.insert(2, 'lat', lat, True)
streets.insert(2, 'lng', lng, True)

In [None]:
# remove outliers
streets_outliers = ['Čermeľské údolie', 'Prešovská cesta', 'Ulica Zelený dvor',
                    'Bankov', 'Ulica Sajkov', 'Cesta do Hanisky',
                    'K zoologickej záhrade', 'Bielych albatrosov', 'Bahýľova',
                    'Svetlá pusta', 'Ludvíkov dvor', 'K letisku']
for s in streets_outliers:
    streets.drop(streets[streets.Street == s].index, inplace=True)

# visually verify street's locations and save it
venues_map = folium.Map(location=[LAT, LNG], zoom_start=12)
for lat, lng, label in zip(streets.lat, streets.lng, streets.Street):
    folium.CircleMarker([lat, lng], radius=5, popup=label).add_to(venues_map)

streets.to_csv("streets.csv")
venues_map

### Boroughs (location and population)

In [90]:
# location of boroughs
boroughs = pd.DataFrame(set(streets['Borough'].tolist()), columns={'Borough'})
lat = []
lng = []
for i, s in boroughs.iterrows():
    location = geocoder.geocode(s['Borough'])
    lat.append(location[0]['geometry']['lat'])
    lng.append(location[0]['geometry']['lng'])
boroughs.insert(1, 'lat', lat, True)
boroughs.insert(1, 'lng', lng, True)

# join population
population = pd.concat(pd.read_html(
    io='https://sk.wikipedia.org/wiki/Mestsk%C3%A9_%C4%8Dasti_Ko%C5%A1%C3%ADc',
    match='Počet obyvateľov',
    encoding='utf-8'))
population.rename(columns={'Poradie': 'Order', 'Mestská časť': 'Borough',
                           'Okres': 'District', 'Plocha v km²': 'Size',
                           'Počet obyvateľov': 'Population'}, inplace=True)
population['Borough'] = 'Košice - ' + population['Borough']
boroughs = boroughs.join(population.set_index('Borough'), on='Borough')
boroughs.to_csv('boroughs.csv')
boroughs

Unnamed: 0,Borough,lng,lat,Order,District,Size,Population
0,Košice - Barca,21.265998,48.676597,12.0,Košice IV,1813,3 677
1,Košice - Kavečany,21.206767,48.774899,18.0,Košice I,1005,1 337
2,Košice - Myslava,21.203971,48.7088,16.0,Košice II,701,2 399
3,Košice - Dargovských hrdinov,21.282077,48.738852,2.0,Košice III,1109,26 004
4,Košice - Džungľa,21.270358,48.736222,22.0,Košice I,47,703
5,Košice - Krásna,21.315172,48.670395,11.0,Košice IV,2005,5 627
6,Košice - Sídlisko KVP,21.213507,48.713881,4.0,Košice II,178,23 603
7,Košice - Lorinčík,21.192117,48.688607,21.0,Košice II,297,770
8,Košice - Nad jazerom,21.28853,48.686518,3.0,Košice IV,366,24 613
9,Košice - Staré Mesto,21.257829,48.720615,7.0,Košice I,434,20 698


In [155]:
# boroughs on map, size reflects population
venues_map = folium.Map(location=[LAT, LNG], zoom_start=12)
for lat, lng, label, size in zip(boroughs.lat, boroughs.lng, boroughs.Borough,
                                 boroughs.Population):
    folium.CircleMarker([lat, lng], radius=int(size.replace(' ', ''))/1000,
                        popup=label).add_to(venues_map)
venues_map

### Sporting facilities using Foursquare (boroughs)

In [227]:
search_query = ['Gym', 'gym', 'arena', 'stadium', 'tennis', 'hockey', 'squash',
                'bowling', 'golf', 'swimming', 'bedminton']
columns = ['id', 'name', 'location.lat', 'location.lng']
facilities = pd.DataFrame([], columns=columns)
for q in search_query:
    print('Searching ' + q)
    for lat, lng in zip(boroughs.lat, boroughs.lng):
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&\
                client_secret={}&ll={},{}&oauth_token={}&v={}&query={}&radius={}'\
                .format(CLIENT_ID, CLIENT_SECRET, lat, lng,
                        ACCESS_TOKEN, VERSION, q, 2000)
        results = requests.get(url).json()
        if len(results['response']['venues']) > 0:
            venues = results['response']['venues']
            facilities = facilities.append(pd.json_normalize(venues)[columns])

facilities.rename(columns={'location.lat': 'lat', 'location.lng': 'lng'},
                  inplace=True)
facilities = facilities.drop_duplicates()
facilities

Unnamed: 0,id,name,lat,lng
0,59e756f2772fbc1b46a4771a,Horvi’s Gym,48.679925,21.296042
1,5fc12266439ae441ad383414,Gymbeam,48.690421,21.259739
2,5045b18fe4b05fe83cfe29f6,Súkromné gymnázium Dneperská 1,48.688062,21.287220
0,5112b014e4b01f7081b61fb8,Olymp GYM,48.716127,21.213273
1,50a28f93e4b0225d6363306b,DaWell Gym,48.718742,21.231872
...,...,...,...,...
0,50df59e4e4b0b92a2bf42da7,Golf Blaster indoor centrum,48.740876,21.246466
0,52e69ff9498e3ba779c9713b,Golf Point,48.719566,21.255363
0,51827fbe498ed11427d7c593,Red Fox Golf Club,48.665452,21.171494
1,503cfee6e4b00777f078bc90,Golfovy Klub Red Fox,48.665570,21.171479


In [232]:
# remove rows with forbidden substrings and save as csv
facilities = facilities[~facilities['name'].str.contains('Gymnázium')]
facilities = facilities[~facilities['name'].str.contains('gymnázium')]
facilities = facilities[~facilities['name'].str.contains('Shop')]
facilities.reset_index()

Unnamed: 0,index,id,name,lat,lng
0,0,59e756f2772fbc1b46a4771a,Horvi’s Gym,48.679925,21.296042
1,1,5fc12266439ae441ad383414,Gymbeam,48.690421,21.259739
2,0,5112b014e4b01f7081b61fb8,Olymp GYM,48.716127,21.213273
3,1,50a28f93e4b0225d6363306b,DaWell Gym,48.718742,21.231872
4,2,515d8372e4b0ff174896c637,Gymnastická hala - Sokol Gymnastik Košice,48.713933,21.226048
5,3,4ed4a44e6c2528aaaddc4706,Gymnastik Klub,48.71139,21.226876
6,0,510513dfe4b08a321c3631c2,Dawell Gym,48.733891,21.284898
7,1,56c4dcf0cd10c05767a26f13,The Gym,48.71357,21.235228
8,3,5474aa9f498ebe35c16d60a2,300 power gym,48.714643,21.236794
9,4,568803cc498ecdfadc96a83c,Gorilla gym,48.703144,21.234126


In [239]:
# find borough for each facility (as closest street)
bor = []
for i, f in facilities.iterrows():
    #print('Searching ' + f['name'])
    b = ""
    min = 100
    for j, s in streets.iterrows():
        dist = hs.haversine((f.lat, f.lng), (s.lat, s.lng))
        if dist < min:
            min = dist
            b = s['Borough']
    bor.append(b)
facilities['Borough'] = bor
facilities.to_csv("facilities.csv")
facilities

Searching Horvi’s Gym
Searching Gymbeam
Searching Olymp GYM
Searching DaWell Gym
Searching Gymnastik Klub
Searching Gymnastická hala - Sokol Gymnastik Košice
Searching Dawell Gym
Searching The Gym
Searching 300 power gym
Searching Gorilla gym
Searching Rebel's gym - Crossfit
Searching Heim Gym
Searching 7 Days Gym
Searching Gym Beam - dopĺnky výživy pre športovcov
Searching Relaxharmony Gym
Searching Gym
Searching Family Gym Cassovar
Searching Regeneračné stredisko Športového gymnázia
Searching Školský internát športového gymnázia
Searching T2 Boulder Arena Košice
Searching Yess Arena
Searching Aréna Sršňov
Searching Angels Arena
Searching Bozenka Arena
Searching Crow Arena
Searching Bowling Pub
Searching T2 Boulder Arena
Searching Steel Arena
Searching T2 Boulder Aréna
Searching Noise Arena
Searching Tréningová hala STEEL ARENA
Searching Futbalové ihrisko Popradská
Searching Golden Dogs Tennis Club
Searching Tennis Court SRC
Searching Table Tennis
Searching Hockey Center
Searching Bal

Unnamed: 0.1,Unnamed: 0,id,name,lat,lng,Borough
0,0,59e756f2772fbc1b46a4771a,Horvi’s Gym,48.679925,21.296042,Košice - Nad jazerom
1,1,5fc12266439ae441ad383414,Gymbeam,48.690421,21.259739,Košice - Juh
2,0,5112b014e4b01f7081b61fb8,Olymp GYM,48.716127,21.213273,Košice - Sídlisko KVP
3,1,50a28f93e4b0225d6363306b,DaWell Gym,48.718742,21.231872,Košice - Západ
4,2,4ed4a44e6c2528aaaddc4706,Gymnastik Klub,48.71139,21.226876,Košice - Západ
5,3,515d8372e4b0ff174896c637,Gymnastická hala - Sokol Gymnastik Košice,48.713933,21.226048,Košice - Západ
6,0,510513dfe4b08a321c3631c2,Dawell Gym,48.733891,21.284898,Košice - Dargovských hrdinov
7,1,56c4dcf0cd10c05767a26f13,The Gym,48.71357,21.235228,Košice - Západ
8,3,5474aa9f498ebe35c16d60a2,300 power gym,48.714643,21.236794,Košice - Západ
9,4,568803cc498ecdfadc96a83c,Gorilla gym,48.703144,21.234126,Košice - Západ


In [242]:
#facilities = pd.read_csv("facilities.csv")
venues_map = folium.Map(location=[LAT, LNG], zoom_start=12)
for lat, lng, name in zip(facilities.lat, facilities.lng, facilities.name):
    folium.CircleMarker([lat, lng], radius=5).add_to(venues_map)
#for lat, lng in zip(facilities.lat, facilities.lng):
#    folium.CircleMarker([lat, lng], radius=5).add_to(venues_map)
venues_map


### Borough with highest demand (lowers facilities per population)

### Best street for new sporting facility