In [None]:
import pandas as pd
import numpy as np
from googleplaces import GooglePlaces, types, lang, GooglePlacesError
from math import sin, cos, sqrt, atan2, radians
import geopandas as gpd
import geojsonio
from geopy.distance import geodesic
from shapely.geometry import Point, Polygon
from shapely.ops import transform
from time import sleep
import re

In [None]:
# import global/static var:
API_KEY = 'abcdefghijk'
google_places = GooglePlaces(API_KEY)

# import geojsons to define boundaries of searched polygon:
polygon = gpd.read_file('saudi.geojson')

In [None]:
# check if point in the polygon
def inPolygon(coord):
    coordinate = Point(coord[1], coord[0])
    return polygon['geometry'].contains(coordinate)[0]

# generate a 5km grid of coordinates within geojson polygon:
def generate_grid(nw, se, lat_coef_5km = 0.048, lgn_coef_5km = 0.049):
    nw_lat = nw[0]
    nw_lng = nw[1]
    se_lat = se[0]
    se_lng = se[1]
    
    north_w2e_ser = []
    for i in np.arange(nw_lng, se_lng, lat_coef_5km):
        north_w2e_ser.append(round(i,4))

    west_n2s_ser = []
    for i in np.arange(nw_lat, se_lat, -lgn_coef_5km):
        west_n2s_ser.append(round(i,4))

    all_points = [[(f, s) for s in north_w2e_ser if inPolygon((f, s))==True] for f in west_n2s_ser]
    all_coordinates = []
    for i in all_points:
        if len(i)>0:
            for j in i:
                all_coordinates.append({'lat':j[0], 'lng':j[1]})
    return all_coordinates

In [None]:
# Google Places:
# extract details of the place
def extract_places_info(res_places):
    res_arr = []
    for place in res_places:
        dict = {}
        dict['name'] = place.name
        dict['geo_location'] = place.geo_location
        dict['place_id'] = place.place_id
        place.get_details()
        dict['details'] = place.details
        dict['local_phone_number'] = place.local_phone_number
        dict['international_phone_number'] = place.international_phone_number
        dict['website'] = place.website
        dict['url'] = place.url
        res_arr.append(dict)
    return res_arr

# perform API call
def query_google(coordinates):
    res_arr = []
    print(coordinates)
    # first query
    query_result = google_places.nearby_search(
            lat_lng = coordinates
            ,radius=5000, types=[types.TYPE_RESTAURANT] or [types.TYPE_BAKERY] or [types.TYPE_BAR] 
                                or [types.TYPE_CAFE] or [types.TYPE_CLOTHING_STORE] or [types.TYPE_DEPARTMENT_STORE]
                                or [types.TYPE_GROCERY_OR_SUPERMARKET] or [types.TYPE_FOOD] or [types.TYPE_ELECTRONICS_STORE]
                                or [types.TYPE_MEAL_DELIVERY] or [types.TYPE_MEAL_TAKEAWAY] or [types.TYPE_SHOE_STORE]
                                or [types.TYPE_SHOPPING_MALL])
    res_arr.extend(extract_places_info(query_result.places))
    print(query_result)
    # next queries if results are available
    while query_result.has_next_page_token:
        sleep(2)
        query_result_next_page = google_places.nearby_search(
            lat_lng = coordinates
            ,pagetoken=query_result.next_page_token
            ,radius=5000, types=[types.TYPE_RESTAURANT] or [types.TYPE_BAKERY] or [types.TYPE_BAR] 
                                or [types.TYPE_CAFE] or [types.TYPE_CLOTHING_STORE] or [types.TYPE_DEPARTMENT_STORE]
                                or [types.TYPE_GROCERY_OR_SUPERMARKET] or [types.TYPE_FOOD] or [types.TYPE_ELECTRONICS_STORE]
                                or [types.TYPE_MEAL_DELIVERY] or [types.TYPE_MEAL_TAKEAWAY] or [types.TYPE_SHOE_STORE]
                                or [types.TYPE_SHOPPING_MALL])
        print(query_result)
        res_arr.extend(extract_places_info(query_result_next_page.places))
        query_result = query_result_next_page
    res_dict = {'lat/lng': coordinates, 'results': res_arr}
    return res_dict

In [None]:
# Parsing Google response
def parse_result_elements(result_element):
    df = pd.DataFrame()
    res_arr = []
    for i in result_element['output']:
        final_dict = {}
        final_dict['_searched_coordinates_lat'] = result_element['searched_coordinates']['lat']
        final_dict['_searched_coordinates_lng'] = result_element['searched_coordinates']['lng']
        for key in i:
            if key == 'details':
                for j in i[key]:
                    if j == 'address_components':
                        for z in i[key][j]:
                            final_dict['long_name_'+str(i[key][j].index(z))] = z['long_name']
                            final_dict['short_name_'+str(i[key][j].index(z))] = z['short_name']
                            for zz in z['types']:
                                final_dict['types_'+str(i[key][j].index(z))+'_'+str(z['types'].index(zz))] = zz

                    if j == 'formatted_address':
                        final_dict['formatted_address'] = i[key][j]

                    if j == 'geometry':
                        if 'viewport' in i[key][j]:
                            final_dict['northeast_lat'] = str(i[key][j]['viewport']['northeast']['lat'])
                            final_dict['northeast_lng'] = str(i[key][j]['viewport']['northeast']['lng'])
                            final_dict['southwest_lat'] = str(i[key][j]['viewport']['southwest']['lat'])
                            final_dict['southwest_lng'] = str(i[key][j]['viewport']['southwest']['lng'])

                    if j == 'id':
                        final_dict['id'] = i[key][j]

                    if j == 'opening_hours':
                        if 'weekday_text' in i[key][j]:
                            for day in i[key][j]['weekday_text']:
                                final_dict['opening_hours_day_'+str(i[key][j]['weekday_text'].index(day))] = day

                    if j == 'plus_code':
                        if 'compound_code' in i[key][j]:
                            final_dict['plus_code'] = i[key][j]['compound_code']

                    if j == 'rating':
                        final_dict['rating'] = i[key][j]

                    if j == 'reviews':
                        final_dict['num_reviews'] = len(i[key][j])
                        if len(i[key][j]) > 0:
                            if 'time' in i[key][j][0]:
                                final_dict['review_min_tmpst'] = i[key][j][0]['time']
                            if 'time' in i[key][j][-1]:
                                final_dict['review_max_tmpst'] = i[key][j][-1]['time']

                    if j == 'types':
                        for _type in i[key][j]:
                            final_dict['type_'+str(i[key][j].index(_type))] = _type

                    if j == 'user_ratings_total':
                        final_dict['user_ratings_total'] = i[key][j]

                    if j == 'vicinity':
                        final_dict['vicinity'] = i[key][j]
            elif key == 'geo_location':
                final_dict['lat'] = str(i[key]['lat'])
                final_dict['lng'] = str(i[key]['lng'])
            else:
                final_dict[key] = i[key]

        res_arr.append(final_dict)
        
    df = pd.concat([df, pd.DataFrame(res_arr)], ignore_index=True, sort=False)
    return df

# get api results
def get_results(x):
    df = pd.DataFrame()
    if len(x['results'])>0:
        res_dict = {'searched_coordinates': x['lat/lng'], 'output':x['results']}
        df = pd.concat([df, pd.DataFrame([res_dict])], ignore_index=True, sort=False)
    return df

In [None]:
# generating coordinates grid
# northewest and southeast corners of the searched geo-region
nw = (32.354735, 33.335969)
se = (13.809974, 56.569381)
all_coordinates = generate_grid(nw, se)

In [None]:
# calling Google Places API for every coordinate
final_res = []
for i in all_coordinates[25000:26000]:
    print(all_coordinates.index(i))
    final_res.extend([query_google(i)])
    if len(final_res)%101==0:
        df = pd.DataFrame(final_res, columns=['res'])
        df.to_csv('points'+str(len(final_res))+'.csv')

In [None]:
# counting results:
count = 0
for i in final_res:
    if len(i['results'])>0:
        count+=1
count

# consolidation and parsing

In [None]:
df_res_final = pd.DataFrame()
for i in final_res:
    df_res_final = pd.concat([df_res_final, pd.DataFrame([i])], ignore_index=True, sort=False)
df_res_final.to_csv('df_res_final.csv')

In [None]:
df_api_res = pd.DataFrame()
df_api_res['res'] = df_res_final.apply(lambda x: get_results({'lat/lng':x['lat/lng'], 'results':x['results']}), axis=1)

In [None]:
df_final_res = pd.DataFrame()
for i in df_api_res['res']:
    if i.empty:
        print('empty')
    else:
        df_final_res = pd.concat([df_final_res, i], ignore_index=True, sort=False)

In [None]:
df_poi_ser = df_final_res.apply(lambda x: parse_result_elements({'output':x['output'], 
                                                                 'searched_coordinates':x['searched_coordinates']}), axis=1)

In [None]:
df_poi = pd.DataFrame()
for i in df_poi_ser:
    df_poi = pd.concat([df_poi, i], ignore_index=True, sort=False)

In [None]:
len(df_poi)

In [None]:
df_poi.to_csv('df_poi_final.csv')
df_poi.to_excel('df_poi_final.xlsx')