In [3]:
# Importing required libraries
import geocoder # import geocoder
import numpy as np
import pandas as pd
import requests  # library to handle requests
from bs4 import BeautifulSoup

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
import geopy.geocoders
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library


import certifi # Nominatum requires a certificate
import ssl

print('Libraries loaded')

Libraries loaded


# Getting neighborhoods data
* Bogotá is divided in neighborhoods and localities. It was required to have a data source describing this division.  After looking for in diferent web sites, it was decided to use following pdf file, which come from the "Secretaria de Salud Distrital":

http://www.saludcapital.gov.co/DPYS/Tablas%20de%20Referencia/Codificación%20de%20Barrios%20por%20localidad.pdf

In [11]:
# It was required to perform a copy and paste from pdf file and clean some empty rows.
# The result was the following csv file

bogota_n = pd.read_csv('bogota_barrios.csv')

# Dropping empty cells using "PostalCode"
bogota_n = bogota_n.dropna(subset=['barrio'])

print(bogota_n.head())
print(bogota_n.shape)

   cod_barrio              barrio       Localidad   Co_digo Localidad 
0        1203            CARACAS   ANTONIO NARINO                   15
1        1202       CIUDAD BERNA   ANTONIO NARINO                   15
2        1204  CIUDAD JARDIN SUR   ANTONIO NARINO                   15
3        2301       EDUARDO FREI   ANTONIO NARINO                   15
4        2107          LA FRAGUA   ANTONIO NARINO                   15
(831, 4)


## Adding Geospatial coordinates

This part of the process required a lot of time. I decided to use the Nominatim geolocator from geopy library, the following code was used to generate a csv file containing the geospatial coordinates of each Bogota neighborhood

In [10]:
ctx = ssl.create_default_context(cafile=certifi.where())
geopy.geocoders.options.default_ssl_context = ctx

bogota_n = pd.read_csv('bogota_barrios.csv')
bogota_n = bogota_n.dropna(subset=['barrio'])

bog_loc_n = []
for index, neighborhood in bogota_n.iterrows():
    try:
        geolocator = Nominatim(user_agent='foursquare_agent')
        location = geolocator.geocode('{}, {}, Bogotá, Distrito Capital, Colombia'.format(neighborhood['barrio'], neighborhood['localidad']))
        if location:
            print(neighborhood['barrio'], location)
            bog_loc_n.append((neighborhood['barrio'], neighborhood['localidad'], location.latitude, location.longitude))
    except:
        pass

bog_loc_df = pd.DataFrame(bog_loc_n, columns=['neighborhood','locality','latitude','longitude'])
bog_loc_df.to_csv('bog_geospatial..csv', index=False)

print(bog_loc_df.head())

Empty DataFrame
Columns: [neighborhood, locality, latitude, longitude]
Index: []


The resulting csv file is the following:

In [15]:
bogota_geo = pd.read_csv('bog_geospatial.csv')

print(bogota_geo.head())
print(bogota_geo.shape)

         neighborhood         locality  latitude  longitude
0            CARACAS   ANTONIO NARINO   4.591831 -74.088903
1       CIUDAD BERNA   ANTONIO NARINO   4.582115 -74.090310
2  CIUDAD JARDIN SUR   ANTONIO NARINO   4.580311 -74.096289
3          LA FRAGUA   ANTONIO NARINO   4.602600 -74.137119
4        LA FRAGUITA   ANTONIO NARINO   4.594836 -74.100720
(501, 4)


#### Let's visualizate Bogota and the neighborhoods in it.

In [19]:
# create map of Bogota using latitude and longitude values
address = 'Bogota, Colombia'
geolocator = Nominatim(user_agent="bogota_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Bogota City are {}, {}.'.format(latitude, longitude))

map_bogota = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(bogota_geo['latitude'], bogota_geo['longitude'], bogota_geo['neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bogota)  
    
map_bogota


The geograpical coordinate of Bogota City are 4.5980772, -74.0761028.
