## Buscar direcciones con API de geocódigos que usan NLP

Instalación de API

In [None]:
!pip install arcgis

Importación de librerías

In [1]:
from arcgis.gis import GIS
from arcgis.geocoding import geocode, reverse_geocode
from arcgis.geometry import Point

In [2]:
gis = GIS()

In [3]:
import pandas as pd

Leyendo archivo del que se consultarán las direcciones

In [4]:
df = pd.read_csv("direcciones.csv", sep=";", encoding="cp1252")
df.head()

Unnamed: 0,Campo1,CLIENTE,SERVICIO,TIPOVIA,NOMBREVIA,PUERTA,PISO,MZ,LOTE,DISTRITO,...,TAP,BORNE,Campo31,Campo32,Campo33,Campo34,Campo35,ID,PAIS,DEPARTAMENTO
0,PUN,6661946,5714017,ND,.,0,,C,10.0,JULIACA,...,9.0,1.0,8/03/2017,09:32:58,7507925.0,S,G003,1,PERÚ,PUNO
1,PUN,6708518,5714051,JR,PORRAS BARRENECHEA RAUL,219,,,,JULIACA,...,3.0,1.0,21/03/2017,18:31:39,7507926.0,S,G003,2,PERÚ,PUNO
2,PUN,6711806,5799021,,TUPAC YUPANQUI,0,,H12,8.0,JULIACA,...,10.0,1.0,22/03/2017,18:25:57,7608640.0,S,G003,3,PERÚ,PUNO
3,PUN,6740184,5821315,JR,VILQUE CHICO,415,,,,JULIACA,...,14.0,1.0,30/03/2017,15:51:03,7635893.0,S,G003,4,PERÚ,PUNO
4,PUN,6740202,5821277,JR,24 DE JUNIO,124,,,,JULIACA,...,7.0,1.0,30/03/2017,15:56:03,7634846.0,S,G003,5,PERÚ,PUNO


Evaluando la dirección y creando campos

In [5]:
# df["ADDRESS"] = df["DIRECCION"].apply(lambda x: x + ", Puno, PER")
df["ADDRESS"] = df["DIRECCION"].apply(lambda x: x.replace("PERÚ", "PER"))
# df["ADDRESS"] = df["DIRECCION"]
df["x"] = 0
df["y"] = 0
df["Match_addr"] = ""
df["Score"] = ""

In [6]:
df.head()

Unnamed: 0,Campo1,CLIENTE,SERVICIO,TIPOVIA,NOMBREVIA,PUERTA,PISO,MZ,LOTE,DISTRITO,...,Campo34,Campo35,ID,PAIS,DEPARTAMENTO,ADDRESS,x,y,Match_addr,Score
0,PUN,6661946,5714017,ND,.,0,,C,10.0,JULIACA,...,S,G003,1,PERÚ,PUNO,"MZ C LOTE 10, UR RINCONADA, JULIACA, PER",0,0,,
1,PUN,6708518,5714051,JR,PORRAS BARRENECHEA RAUL,219,,,,JULIACA,...,S,G003,2,PERÚ,PUNO,"JR PORRAS BARRENECHEA RAUL 219, JULIACA, PER",0,0,,
2,PUN,6711806,5799021,,TUPAC YUPANQUI,0,,H12,8.0,JULIACA,...,S,G003,3,PERÚ,PUNO,"MZ H12 LOTE 8, MUN. LA CAPILLA, JULIACA, PER",0,0,,
3,PUN,6740184,5821315,JR,VILQUE CHICO,415,,,,JULIACA,...,S,G003,4,PERÚ,PUNO,"JR VILQUE CHICO 415, JULIACA, PER",0,0,,
4,PUN,6740202,5821277,JR,24 DE JUNIO,124,,,,JULIACA,...,S,G003,5,PERÚ,PUNO,"JR 24 DE JUNIO 124, JULIACA, PER",0,0,,


Definiendo una función de consulta e inserción de elementos a partir de la dirección buscada

In [7]:
def extractAddress(df, indice, idDf, address):
    gis = GIS()
    geocode_result = geocode(address = df.iloc[indice][address], as_featureset=True, max_locations=3) # Cambiar la dirección
    for x in geocode_result.features:
        gdpoint = x.as_dict["geometry"]
        attr = x.as_dict["attributes"]
        df = df.append(pd.Series([df.iloc[indice][idDf], gdpoint["x"], gdpoint["y"], attr["Match_addr"], attr["Score"]], index=["CLIENTE", "x", "y", "Match_addr", "Score"] ), ignore_index=True)
    return df


Ejecutando la función para los primeros 50 elementos

In [8]:
# for n in range(len(df)):
for n in range(50):
    if n%10==0: print(n)
    df = extractAddress(df, n, "CLIENTE", "ADDRESS")

0
10
20
30
40


Visualizando resultados por cada CLIENTE

In [10]:
df=df.sort_values(by='CLIENTE')
df.head()

Unnamed: 0,Campo1,CLIENTE,SERVICIO,TIPOVIA,NOMBREVIA,PUERTA,PISO,MZ,LOTE,DISTRITO,...,Campo34,Campo35,ID,PAIS,DEPARTAMENTO,ADDRESS,x,y,Match_addr,Score
0,PUN,6661946,5714017.0,ND,.,0.0,,C,10.0,JULIACA,...,S,G003,1.0,PERÚ,PUNO,"MZ C LOTE 10, UR RINCONADA, JULIACA, PER",0.0,0.0,,
18301,,6661946,,,,,,,,,...,,,,,,,-77.73333,-10.41667,"Rinconada, Barranca, Lima",78.67
18300,,6661946,,,,,,,,,...,,,,,,,-77.40083,-8.14722,"Rinconada, Provincia de Pataz, La Libertad",78.67
18299,,6661946,,,,,,,,,...,,,,,,,-70.141062,-15.505383,"Avenida La Rinconada, Juliaca, San Román, Puno...",82.91
1,PUN,6708518,5714051.0,JR,PORRAS BARRENECHEA RAUL,219.0,,,,JULIACA,...,S,G003,2.0,PERÚ,PUNO,"JR PORRAS BARRENECHEA RAUL 219, JULIACA, PER",0.0,0.0,,


Exportando resultados a formato excel

In [0]:
df.to_excel("resultado.xls")