In [1]:
import pandas as pd
import numpy as np
import csv
import json 
import sys
import requests
from math import sin, cos, sqrt, atan2, radians
import matplotlib.pyplot as plt
%matplotlib inline

######################## Constants #############################
R_EARTH = 6373.0 # approximate radius of earth in km
LAT_IDX = 1
LON_IDX = 0
LAT = 1
LON = 0
google_api_keys = ['AIzaSyDGpP7-Zq7XMiuoeL2tgG6mjof4PmKeyVY']


In [2]:
#####################################################################
#           Function to obtain distance between two points          #
# @param point_a (tuple)                                            #
# @param point_b (tuple)                                            #
#####################################################################
def get_distance(point_a, point_b):
    lon1 = radians(point_a[LON_IDX])
    lon2 = radians(point_b[LON_IDX])
    lat1 = radians(point_a[LAT_IDX])
    lat2 = radians(point_b[LAT_IDX])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R_EARTH * c

In [3]:
data = pd.read_csv('./data/01_output_localities.csv',low_memory = False)
data.head()

Unnamed: 0,localidad_id,lon,lat,distance
0,644470,-98.233055,19.312222,0.84449
1,644472,-98.258888,19.295,3.213229
2,290236,-98.238609,19.316668,0.080223
3,644469,-98.216942,19.335833,3.127066
4,210354,-105.206047,21.808336,0.274223


In [4]:
############ for each point search nearest bansefi branch
banks_by_point = []
for item in data.itertuples():
    point =  (item[2], item[3])
    url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=%s,%s&radius=50000&keyword=bansefi&type=bank&key=%s'%(str(point[LAT]),str(point[LON]), google_api_keys[0])
    r = requests.get(url)
    banks_by_point.append(r)


In [5]:
data_idx = 0
#Create entries on dataframe
data['google_distance'] = 'missing'
data['ban_lon'] = 'missing'
data['ban_lat'] = 'missing'
data['place_id'] = 'missing'

for bank_item, point in zip(banks_by_point,data.itertuples()): 
    result_json = bank_item.json()
    idx = 0
    min_distance = sys.maxint
    point_a = (point[2], point[3])
    for item in result_json['results']: 
        point_b =  (item['geometry']['location'][u'lng'],item['geometry']['location'][u'lat'])
        distance_item = get_distance(point_a, point_b)
        
        if min_distance > distance_item:
            min_distance = distance_item
            min_point = point_b
            min_idx = idx
            min_place_id = item['place_id']
        idx += 1
    if min_distance != sys.maxint:
        data.set_value(data_idx, 'google_distance', min_distance)
        data.set_value(data_idx, 'ban_lon', min_point[LON_IDX])
        data.set_value(data_idx, 'ban_lat', min_point[LAT_IDX])
        data.set_value(data_idx, 'place_id', min_place_id)
       
    data_idx += 1

In [6]:
data.head()

Unnamed: 0,localidad_id,lon,lat,distance,google_distance,ban_lon,ban_lat,place_id
0,644470,-98.233055,19.312222,0.84449,0.850545,-98.2393,19.3171,ChIJaz5QJzzZz4URCrzgP-JjiC8
1,644472,-98.258888,19.295,3.213229,3.20077,-98.2393,19.3171,ChIJaz5QJzzZz4URCrzgP-JjiC8
2,290236,-98.238609,19.316668,0.080223,0.0870909,-98.2393,19.3171,ChIJaz5QJzzZz4URCrzgP-JjiC8
3,644469,-98.216942,19.335833,3.127066,3.01799,-98.195,19.3183,ChIJ3dzI5evez4UR8RVyqJGK-6c
4,210354,-105.206047,21.808336,0.274223,0.245684,-105.208,21.8074,ChIJqRvFe1KvIIQRbmdkL2lQr0A


In [7]:
#Save data
data.to_csv('./data/02_output_localidades_y_bansefi.csv', encoding='utf-8')