In [14]:
import googlemaps
from datetime import datetime

with open('api_key.txt') as f:
    api_key = f.readline()
    f.close
gmaps = googlemaps.Client(api_key)

In [15]:
# Geocode helper functions

# returns the geocode (json containing details of the location)
def google_get_geocode(location):
    try: 
        geocode_result = gmaps.geocode(location)
        return geocode_result[0]
    except: return None

# returns latitude given the geocode json
def google_get_latitude(geocode):
    try: 
        return geocode['geometry']['location']['lat']
    except: 
        return None

# returns longitude given the geocode json
def google_get_longitude(geocode):
    try: 
        return geocode['geometry']['location']['lng']
    except: 
        return None

# returns address given the geocode json
def google_get_address(geocode):
    try: 
        return geocode['formatted_address']
    except: 
        return None

In [16]:
import pandas as pd
import numpy as np

In [17]:
sm_df = pd.read_csv('data/sm.csv')
ayala_df = pd.read_csv('data/ayala.csv')
robinsons_df = pd.read_csv('data/robinsons.csv')

In [18]:
sm_df['temp_mall_address'] = sm_df['mall_name'] + ', ' +sm_df['mall_address']
ayala_df['temp_mall_address'] = ayala_df['mall_address']
robinsons_df['temp_mall_address'] = robinsons_df['mall_name'] + ', ' +robinsons_df['mall_address']

In [19]:
geocode_data = []
data_list = [sm_df, ayala_df, robinsons_df]

# get geocode, latitude, longitude, and formatted address of each mall
for table in data_list:
    table['geocode'] = table.temp_mall_address.apply(google_get_geocode)
    table['latitude'] = table.geocode.apply(google_get_latitude)
    table['longitude'] = table.geocode.apply(google_get_longitude)
    table['formatted_address'] = table.geocode.apply(google_get_address)

In [20]:
del sm_df['temp_mall_address']
del ayala_df['temp_mall_address']
del robinsons_df['temp_mall_address']

In [79]:
def get_province(geocode):
    for x in geocode['address_components']:
        if x['types'][0] == 'administrative_area_level_1':
            if x['long_name'] == 'Metro Manila':
                #print("Manila")
                return x['long_name']
        if x['types'][0] == 'administrative_area_level_2':
            #print(x['long_name'])
            return x['long_name']

sm_df['province'] = sm_df.geocode.apply(get_province)
ayala_df['province'] = ayala_df.geocode.apply(get_province)
robinsons_df['province'] = robinsons_df.geocode.apply(get_province)

In [80]:
import os
directory = 'data'
if not os.path.exists(directory):
    os.makedirs(directory)

In [88]:
ayala_df[ayala_df['province'].isna()]

Unnamed: 0,mall_name,mall_address,mall_contact_no,image_link,fb_link,logo_link,geocode,latitude,longitude,formatted_address,province
3,Greenbelt,"Greenbelt Mall, Legazpi Street, Makati City",+63 2 7795-9595\n+63 2 7729 2137,https://s3-ap-southeast-1.amazonaws.com/interi...,facebook.com/Greenbelt.Ayala Malls,https://s3-ap-southeast-1.amazonaws.com/interi...,{'address_components': [{'long_name': 'Legazpi...,14.55293,121.021003,"Legazpi Street, Makati, Kalakhang Maynila, Phi...",


In [96]:
ayala_df.loc[3, 'province'] = "Metro Manila"

# replace Greenbelt null province to Metro Manila

In [97]:
dict_df = {'sm.csv': sm_df,
           'ayala.csv': ayala_df,
           'robinsons.csv': robinsons_df}

for key,val in dict_df.items():
    val.to_csv(os.path.join(directory, key), index=False)