# Geolocalización

In [1]:
import ast
import csv
import os
import re
from collections import defaultdict
from collections import Counter
from datetime import datetime
from datetime import timedelta
from string import punctuation

import matplotlib
import numpy as np
import networkx as nx
import pandas as pd
import requests
import seaborn as sns
from geolocation.main import GoogleMaps
from matplotlib import pyplot as plt
from SPARQLWrapper import SPARQLWrapper, JSON

from googlemaps_key import KEY

## Autores

In [2]:
%%time

autores_df = pd.read_csv(
    'autores_colombianos_2.csv',
    header=0,
    converters={
        'GEOLOC_CAPITAL_AUTOR': ast.literal_eval,
        'GEOLOC_CAPITAL_EJEMPLAR': ast.literal_eval
    }
)
autores_df = autores_df.fillna('')

CPU times: user 162 ms, sys: 857 µs, total: 163 ms
Wall time: 162 ms


In [3]:
autores_df.shape

(3450, 12)

In [4]:
autores_df.head()

Unnamed: 0,TITULO,FECHA_PUB,PAIS_EJEMPLAR,AUTOR,PAIS_AUTOR,NOMBRE_LOCALIZADO,CAPITAL_AUTOR,CAPITAL_EJEMPLAR,GEOLOC_CAPITAL_AUTOR,GEOLOC_CAPITAL_EJEMPLAR,CIUDAD_AUTOR,DEPARTAMENTO_AUTOR
0,En la laguna más profunda,2011,Colombia,Oscar Collazos,Colombia,Oscar Collazos,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Bahía Solano,Chocó
1,Zoro,1997,Colombia,Jairo Aníbal Niño,Colombia,Jairo Aníbal Niño,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Moniquirá,Boyacá
2,Mitos y leyendas indígenas de Colombia,2013,Colombia,Javier Ocampo López,Colombia,Javier Ocampo López,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Aguadas,Caldas
3,Puro pueblo,2002,Colombia,Jairo Aníbal Niño,Colombia,Jairo Aníbal Niño,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Moniquirá,Boyacá
4,"Octubre, 1942-1952",1953,Colombia,Carmelina Soto Valencia,Colombia,Carmelina Soto Valencia,Bogotá,Bogotá,"(4.710988599999999, -74.072092)","(4.710988599999999, -74.072092)",Armenia,Quindío


In [5]:
autores_df = autores_df[
    (autores_df['CIUDAD_AUTOR'] != 'DESCONOCIDO') &
    (autores_df['DEPARTAMENTO_AUTOR'] != 'DESCONOCIDO')
]
autores_df = autores_df.copy()

In [6]:
autores_df.shape

(3217, 12)

## Geocoordinates

In [7]:
autores_df['CIUDAD_AUTOR'].nunique()

69

In [8]:
%%time

places = set()
for i, x in autores_df.iterrows():
    ciudad = x['CIUDAD_AUTOR']
    departamento = x['DEPARTAMENTO_AUTOR']
    pais = 'Colombia'
    places.add((ciudad, departamento, pais))

CPU times: user 407 ms, sys: 38 µs, total: 407 ms
Wall time: 404 ms


In [9]:
len(places)

69

In [10]:
google_maps = GoogleMaps(api_key=KEY)

In [11]:
%%time

locs = {} # (lat, lng)

for p in places:
    found = False
    try:
        locations = google_maps.search(location=', '.join(p))
        if len(locations.list_data) == 1:
            location = locations.list_data[0]
            lc = location.country.decode('utf-8') 
            if lc == p[2]:
                locs[p[0]] = (location.lat, location.lng)
                found = True
            else:
                print('NOT FOUND:', p)
    except ApiClientException as e:
        pass
    if not found:
        print('NOT FOUND:', p)

NOT FOUND: ('Ibagué', 'Tolima', 'Colombia')
NOT FOUND: ('Aracataca', 'Magdalena', 'Colombia')
NOT FOUND: ('San Vicente de Chucurí', 'Santander', 'Colombia')
NOT FOUND: ('Amalfi', 'Antioquia', 'Colombia')
NOT FOUND: ('Medellín', 'Antioquia', 'Colombia')
CPU times: user 1.63 s, sys: 61.4 ms, total: 1.69 s
Wall time: 25.4 s


In [12]:
locs['Amalfi'] = (6.909167, -75.076667)
locs['Aracataca'] = (10.593694, -74.192808)
locs['Ibagué'] = (4.433333, -75.233333)
locs['Medellín'] = (6.230833, -75.590556)
locs['San Vicente de Chucurí'] = (6.881944, -73.411944)

In [13]:
%%time

def process_geolocation_ciudad_autor(x):
    c = x['CIUDAD_AUTOR']
    return locs[c]

autores_df['GEOLOC_CIUDAD_AUTOR'] = autores_df.apply(process_geolocation_ciudad_autor, axis=1)

CPU times: user 98 ms, sys: 360 µs, total: 98.4 ms
Wall time: 97.2 ms


# Saving results

In [14]:
autores_df.to_csv('autores_colombianos_3.csv', index=False)