In [42]:
from time import sleep
import json
from tqdm import tqdm
tqdm.pandas(desc='pandas')

import pandas as pd
import numpy as np
from dask import dataframe as dd

from geopy.geocoders import Nominatim


geoEncoder = Nominatim(user_agent='spanish-cities')

def getCoords(name: str):
    try:
        geoInfo = geoEncoder.geocode(f'{name}, Spain').raw
        return float(geoInfo['lat']), float(geoInfo['lon'])
            
    except:
        return np.nan, np.nan
        
def backup(dic, path):
    with open(path,'w') as f:
        json.dump(obj=dic, fp=f, indent=4)

def restore(path):
    with open(path) as f:
        return json.load(f)

## get glovo city coords

In [7]:
df = pd.read_csv('glovo_cities.csv', index_col=0)
glovoCityCoords = dict.fromkeys(list(df.city_name), None)

In [8]:
for city in tqdm(glovoCityCoords.keys()):
    sleep(1.5)

    glovoCityCoords[city] = getCoords(city)
    

glovoCityCoords

100%|██████████| 312/312 [09:00<00:00,  1.73s/it]


{'Madrid': (40.4167047, -3.7035825),
 'Valencia': (39.4697065, -0.3763353),
 'Barcelona': (41.3828939, 2.1774322),
 'Sevilla': (37.3886303, -5.9953403),
 'Zaragoza': (41.6521342, -0.8809428),
 'Malaga': (36.7213028, -4.4216366),
 'Palma': (39.5695818, 2.6500745),
 'Las Palmas de Gran Canaria': (28.1288694, -15.4349015),
 'Tenerife': (28.2935785, -16.621447121144122),
 'Alcalá de Henares y Torrejón de Ardoz': (nan, nan),
 'Leganés - Getafe': (40.3141049, -3.7416838),
 'Vigo': (42.2376602, -8.7247205),
 'Granada': (37.1734995, -3.5995337),
 'Córdoba': (37.8845813, -4.7760138),
 'Oviedo': (43.3604218, -5.8450534),
 'Sabadell': (41.5421013, 2.1138977),
 'Valladolid': (41.6521328, -4.728562),
 'Bilbao': (43.2630018, -2.9350039),
 'Pamplona': (42.8184538, -1.6442556),
 'Santander': (43.4620412, -3.8099719),
 'Sant Cugat del Vallès y Rubí': (nan, nan),
 'A Coruña': (43.3709703, -8.3959425),
 'Vitoria': (42.8465088, -2.6724025),
 'Murcia': (37.9923795, -1.1305431),
 'Terrassa': (41.5629623, 2.

In [9]:
backup(glovoCityCoords, 'glovoCityCoords.json')

## find closest glovo city for all census towns

In [23]:
def distance(coords1: tuple, coords2: tuple):
    return np.sqrt((coords1[0] - coords2[0])**2  +  (coords1[1] - coords2[1])**2)

def closestGlovoCity(testCoords: tuple):
    theFavourite = ('', np.inf)

    for city, coords in glovoCityCoords.items():
        d = distance(testCoords, coords)

        if d < theFavourite[1]:
            theFavourite = (city, d)

    return theFavourite[0]

In [45]:
censusCities = pd.read_excel('../aux_data/01_census/data/provincial and municipal codes.xlsx')
censusCities

Unnamed: 0,CODAUTO,CPRO,CMUN,DC,NOMBRE
0,16,1,1,4,Alegría-Dulantzi
1,16,1,2,9,Amurrio
2,16,1,3,5,Aramaio
3,16,1,4,0,Artziniega
4,16,1,6,6,Armiñón
...,...,...,...,...,...
8126,2,50,296,7,"Zaida, La"
8127,2,50,297,3,Zaragoza
8128,2,50,298,9,Zuera
8129,18,51,1,3,Ceuta


In [49]:
censusCityCoords = dict.fromkeys(list(censusCities.NOMBRE), np.nan)
censusCitiesClosestGlovo = dict.fromkeys(list(censusCities.NOMBRE), np.nan)

for city in tqdm(censusCityCoords.keys()):
    sleep(1.5)

    coords = getCoords(city)
    censusCityCoords[city] = coords

    censusCitiesClosestGlovo[city] = closestGlovoCity(coords)



100%|██████████| 8114/8114 [3:51:40<00:00,  1.71s/it]  


In [50]:
backup(censusCitiesClosestGlovo, 'censusCitiesClosestGlovo.json')
backup(censusCityCoords, 'censusCityCoords.json')