# Geolocalización

In [1]:
import ast
import csv
import os
import re
from collections import defaultdict
from collections import Counter
from datetime import datetime
from datetime import timedelta
from string import punctuation

import matplotlib
import numpy as np
import networkx as nx
import pandas as pd
import requests
import seaborn as sns
from geolocation.main import GoogleMaps
from matplotlib import pyplot as plt
from SPARQLWrapper import SPARQLWrapper, JSON

from googlemaps_key import KEY

## Autores

In [2]:
%%time

autores_df = pd.read_csv(
    'autores_colombianos_3.csv',
    header=0,
    converters={
        'GEOLOC_CAPITAL_AUTOR': ast.literal_eval,
        'GEOLOC_CAPITAL_EJEMPLAR': ast.literal_eval
    }
)
autores_df = autores_df.fillna('')

CPU times: user 168 ms, sys: 3.92 ms, total: 172 ms
Wall time: 171 ms


In [3]:
autores_df.shape

(3369, 13)

In [4]:
autores_df.head()

Unnamed: 0,TITULO,PAIS_EJEMPLAR,AUTOR,PAIS_AUTOR,FECHA_PUB,CAPITAL_AUTOR,CAPITAL_EJEMPLAR,GEOLOC_CAPITAL_AUTOR,GEOLOC_CAPITAL_EJEMPLAR,CIUDAD_AUTOR,DEPARTAMENTO_AUTOR,ENCONTRADO,NOMBRE_ENCONTRADO
0,En la laguna más profunda,Colombia,Oscar Collazos,Colombia,2011,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Bahía Solano,Chocó,True,Oscar Collazos
1,Proyecto para un arca en medio de un diluvio d...,Colombia,Luis Carlos Restrepo,Colombia,1997,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Filandia,Quindío,True,Luis Carlos Restrepo
2,"Octubre, 1942-1952",Colombia,Carmelina Soto Valencia,Colombia,1953,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Armenia,Quindío,True,Carmelina Soto Valencia
3,Origen y desarrollo del camino del quindio,Colombia,Jesús Arango Cano,Colombia,2002,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",La Tebaida,Quindío,True,Jesús Arango Cano
4,Mi gran aventura cósmica,Colombia,Jesús Arango Cano,Colombia,1976,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",La Tebaida,Quindío,True,Jesús Arango Cano


In [None]:
autores_df = autores_df[
    (autores_df['CIUDAD_AUTOR'] != 'DESCONOCIDO') &
    (autores_df['DEPARTAMENTO_AUTOR'] != 'DESCONOCIDO')
]
autores_df = autores_df.copy()

## Geocoordinates

In [5]:
autores_df['CIUDAD_AUTOR'].nunique()

144

In [6]:
%%time

places = set()
for i, x in autores_df.iterrows():
    ciudad = x['CIUDAD_AUTOR']
    departamento = x['DEPARTAMENTO_AUTOR']
    pais = 'Colombia'
    places.add((ciudad, departamento, pais))

CPU times: user 446 ms, sys: 0 ns, total: 446 ms
Wall time: 445 ms


In [7]:
len(places)

144

In [8]:
google_maps = GoogleMaps(api_key=KEY)

In [9]:
%%time

locs = {} # (lat, lng)

for p in places:
    found = False
    try:
        locations = google_maps.search(location=', '.join(p))
        if len(locations.list_data) == 1:
            location = locations.list_data[0]
            lc = location.country.decode('utf-8') 
            if lc == p[2]:
                locs[p[0]] = (location.lat, location.lng)
                found = True
            else:
                print('NOT FOUND:', p)
    except ApiClientException as e:
        pass
    if not found:
        print('NOT FOUND:', p)

NOT FOUND: ('El Socorro', 'Santander', 'Colombia')
NOT FOUND: ('Ibagué', 'Tolima', 'Colombia')
NOT FOUND: ('Amalfi', 'Antioquia', 'Colombia')
NOT FOUND: ('Chía', 'Cundinamarca', 'Colombia')
NOT FOUND: ('Aracataca', 'Magdalena', 'Colombia')
NOT FOUND: ('Santa Fe de Antioquia', 'Antioquia', 'Colombia')
NOT FOUND: ('Medellín', 'Antioquia', 'Colombia')
NOT FOUND: ('La Paz', 'Cesar', 'Colombia')
CPU times: user 1.96 s, sys: 98.5 ms, total: 2.06 s
Wall time: 1min 19s


In [24]:
locs['El Socorro'] = (6.467778, -73.259722)
locs['Ibagué'] = (4.433333, -75.233333)
locs['Amalfi'] = (6.909167, -75.076667)
locs['Chía'] = (4.864758, -74.05091800000001)
locs['Aracataca'] = (10.593694, -74.192808)
locs['Santa Fe de Antioquia'] = (6.55, -75.816667)
locs['Medellín'] = (6.230833, -75.590556)
locs['La Paz'] = (6.189809899999999, -73.57585)

In [25]:
%%time

def process_geolocation_ciudad_autor(x):
    c = x['CIUDAD_AUTOR']
    return locs[c]

autores_df['GEOLOC_CIUDAD_AUTOR'] = autores_df.apply(process_geolocation_ciudad_autor, axis=1)

CPU times: user 114 ms, sys: 0 ns, total: 114 ms
Wall time: 116 ms


# Saving results

In [26]:
autores_df.to_csv('autores_colombianos_4.csv', index=False)