# Geolocation

In [None]:
import csv
import os
import re
from collections import defaultdict
from collections import Counter
from datetime import datetime
from datetime import timedelta
from string import punctuation

import matplotlib
import numpy as np
import networkx as nx
import pandas as pd
import requests
import seaborn as sns
from geolocation.exceptions import ApiClientException
from geolocation.main import GoogleMaps
from matplotlib import pyplot as plt
from SPARQLWrapper import SPARQLWrapper, JSON

from googlemaps_key import KEY

# Autores

In [None]:
%%time

autores_df = pd.read_csv('autores6.csv', header=0)
autores_df = autores_df.fillna('')

In [None]:
autores_df.shape

In [None]:
autores_df.head()

## Limpiando datos

In [None]:
autores_df = autores_df[
    (autores_df['CAPITAL_AUTOR'] != 'DESCONOCIDO') &
    (autores_df['CAPITAL_EJEMPLAR'] != 'DESCONOCIDO')
]

In [None]:
autores_df.shape

In [None]:
autores_df.head()

# Geocoordinates

In [None]:
autores_df['CAPITAL_EJEMPLAR'].nunique()

In [None]:
autores_df['CAPITAL_AUTOR'].nunique()

In [None]:
%%time

places = set()
for i, x in autores_df.iterrows():
    pais_ejemplar = x['PAIS_EJEMPLAR']
    capital_ejemplar = x['CAPITAL_EJEMPLAR']
    places.add((capital_ejemplar, pais_ejemplar))
    pais_autor = x['PAIS_AUTOR']
    capital_autor = x['CAPITAL_AUTOR']
    places.add((capital_autor, pais_autor))

In [None]:
len(places)

In [None]:
capital_countries_d = defaultdict(set)
for a, b in places:
    capital_countries_d[a].add(b)

In [None]:
for k in capital_countries_d:
    if len(capital_countries_d[k]) > 1:
        print(k, '-', capital_countries_d[k])

##### ==> No duplicated capital's names for two different countries

In [None]:
google_maps = GoogleMaps(api_key=KEY)

In [None]:
%%time

locs = {} # (lat, lng)

for capital, country in places:
    if capital not in locs:
        found = False
        try:
            locations = google_maps.search(location=capital + ', ' + country)
            print(locations.list_data)
            if len(locations.list_data) == 1:
                location = locations.list_data[0]
                locs[capital] = (location.lat, location.lng)
                found = True
            if not found:
                for location in locations.list_data:
                    if location.country is None or str(location.country, 'utf-8') in capital_countries_d[capital]:
                        locs[capital] = (location.lat, location.lng)
                        found = True
                        break
        except ApiClientException as e:
            pass
        if not found:
            try:
                locations = google_maps.search(location=capital)
                if len(locations.list_data) == 1:
                    location = locations.list_data[0]
                    locs[capital] = (location.lat, location.lng)
                    found = True
                if not found:
                    for location in locations.list_data:
                        if str(location.country, 'utf-8') in capital_countries_d[capital]:
                            locs[capital] = (location.lat, location.lng)
                            found = True
                            break
            except ApiClientException as e:
                pass
        if not found:
            print('NOT FOUND:', capital, '-', country)

In [None]:
locs['Tokyo'] = (35.6732615, 139.5699618)
locs['Jakarta'] = (-8.792258, 99.0179762)
locs['Gibraltar'] = (36.1407734, -5.353599399999999)
locs['Singapore'] = (1.3147268, 103.7065911)
locs['Vatican City'] = (41.9021788, 12.4536007)
locs['Batavia, Dutch East Indies'] = (-8.792258, 99.0179762)
locs['Brasília'] = (-15.7942287, -47.8821658)
locs['Monaco'] = (43.7400718, 7.426643599999999)
locs['Persépolis'] = (29.934444, 52.891389)
locs['Kingdom of England'] = (51.5073509, -0.1277583)
locs['Tagsatzung'] = (47.3774336, 8.466504)
locs['Brunswick'] = (52.266667, 10.516667)
locs['Hong Kong'] = (22.3, 114.2)

In [None]:
%%time

def process_geolocation_capital_autor(x):
    c = x['CAPITAL_AUTOR']
    return locs.get(c, (0, 0))

autores_df['GEOLOC_CAPITAL_AUTOR'] = autores_df.apply(process_geolocation_capital_autor, axis=1)

In [None]:
%%time

def process_geolocation_capital_ejemplar(x):
    c = x['CAPITAL_EJEMPLAR']
    return locs.get(c, (0, 0))

autores_df['GEOLOC_CAPITAL_EJEMPLAR'] = autores_df.apply(process_geolocation_capital_ejemplar, axis=1)

# Saving results

In [None]:
autores_df.to_csv('autores7.csv', index=False)