# Geolocalización

In [1]:
import ast
import csv
import os
import re
from collections import defaultdict
from collections import Counter
from datetime import datetime
from datetime import timedelta
from string import punctuation

import matplotlib
import numpy as np
import networkx as nx
import pandas as pd
import requests
import seaborn as sns
from geolocation.main import GoogleMaps
from matplotlib import pyplot as plt
from SPARQLWrapper import SPARQLWrapper, JSON

from googlemaps_key import KEY

## Autores

In [2]:
%%time

autores_df = pd.read_csv(
    'autores_colombianos_3.csv',
    header=0,
    converters={
        'GEOLOC_CAPITAL_AUTOR': ast.literal_eval,
        'GEOLOC_CAPITAL_EJEMPLAR': ast.literal_eval
    }
)
autores_df = autores_df.fillna('')

CPU times: user 237 ms, sys: 6.62 ms, total: 244 ms
Wall time: 243 ms


In [3]:
autores_df.shape

(5551, 13)

In [4]:
autores_df.head()

Unnamed: 0,TITULO,PAIS_EJEMPLAR,AUTOR,PAIS_AUTOR,FECHA_PUB,CAPITAL_AUTOR,CAPITAL_EJEMPLAR,GEOLOC_CAPITAL_AUTOR,GEOLOC_CAPITAL_EJEMPLAR,CIUDAD_AUTOR,DEPARTAMENTO_AUTOR,ENCONTRADO,NOMBRE_ENCONTRADO
0,En la laguna más profunda,Colombia,Oscar Collazos,Colombia,2011,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Bahía Solano,Chocó,True,Oscar Collazos
1,El gato bandido y otros cuentos,Colombia,Rafael Pombo,Colombia,2007,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Bogotá,Cundinamarca,True,Rafael Pombo
2,Cuentos pintados,Colombia,Rafael Pombo,Colombia,2011,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Bogotá,Cundinamarca,True,Rafael Pombo
3,Aleida : 10 años,Colombia,Vladdo,Colombia,2007,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Bogotá,Cundinamarca,True,Vladdo
4,Mitos y leyendas indígenas de Colombia,Colombia,Javier Ocampo López,Colombia,2013,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Aguadas,Caldas,True,Javier Ocampo López


In [5]:
autores_df = autores_df[
    (autores_df['CIUDAD_AUTOR'] != 'DESCONOCIDO') &
    (autores_df['DEPARTAMENTO_AUTOR'] != 'DESCONOCIDO')
]
autores_df = autores_df.copy()

In [6]:
autores_df.shape

(5364, 13)

## Geocoordinates

In [7]:
autores_df['CIUDAD_AUTOR'].nunique()

148

In [8]:
%%time

places = set()
for i, x in autores_df.iterrows():
    ciudad = x['CIUDAD_AUTOR']
    departamento = x['DEPARTAMENTO_AUTOR']
    pais = 'Colombia'
    places.add((ciudad, departamento, pais))

CPU times: user 697 ms, sys: 0 ns, total: 697 ms
Wall time: 694 ms


In [9]:
len(places)

148

In [10]:
google_maps = GoogleMaps(api_key=KEY)

In [11]:
%%time

locs = {} # (lat, lng)

for p in places:
    found = False
    try:
        locations = google_maps.search(location=', '.join(p))
        if len(locations.list_data) == 1:
            location = locations.list_data[0]
            lc = location.country.decode('utf-8') 
            if lc == p[2]:
                locs[p[0]] = (location.lat, location.lng)
                found = True
            else:
                print('NOT FOUND:', p)
    except ApiClientException as e:
        pass
    if not found:
        print('NOT FOUND:', p)

NOT FOUND: ('Aracataca', 'Magdalena', 'Colombia')
NOT FOUND: ('Ibagué', 'Tolima', 'Colombia')
NOT FOUND: ('Socorro', 'Santander', 'Colombia')
NOT FOUND: ('La Paz', 'Cesar', 'Colombia')
NOT FOUND: ('Amalfi', 'Antioquia', 'Colombia')
NOT FOUND: ('Medellín', 'Antioquia', 'Colombia')
NOT FOUND: ('Chía', 'Cundinamarca', 'Colombia')
NOT FOUND: ('Santa Fe de Antioquia', 'Antioquia', 'Colombia')
CPU times: user 3.52 s, sys: 141 ms, total: 3.66 s
Wall time: 1min 51s


In [12]:
locs['Aracataca'] = (10.593694, -74.192808)
locs['Ibagué'] = (4.433333, -75.233333)
locs['Socorro'] = (6.467778, -73.259722)
locs['La Paz'] = (6.189809899999999, -73.57585)
locs['Amalfi'] = (6.909167, -75.076667)
locs['Medellín'] = (6.230833, -75.590556)
locs['Chía'] = (4.864758, -74.05091800000001)
locs['Santa Fe de Antioquia'] = (6.55, -75.816667)

In [13]:
%%time

def process_geolocation_ciudad_autor(x):
    c = x['CIUDAD_AUTOR']
    return locs[c]

autores_df['GEOLOC_CIUDAD_AUTOR'] = autores_df.apply(process_geolocation_ciudad_autor, axis=1)

CPU times: user 168 ms, sys: 353 µs, total: 169 ms
Wall time: 167 ms


# Saving results

In [14]:
autores_df.to_csv('autores_colombianos_4.csv', index=False)