In [None]:
import pandas as pd
import requests
import json
from haversine import haversine_vector, Unit
import time

## Get the data of places from the Google API

In [None]:
MYKEY = 'AIzaSyDQjEq0NZ3VKqNI4VD7WRmXUtCKOt0xwm4'
PLACES = [
    'restaurant', 'bar',
    'school',
    'park',
    'bus_station', 'train_station'
]
PARISHES = [
    'glória',
    'vera cruz',
    'aradas'
]

In [None]:
def searchPlaces(what, place):
    url_base = "https://maps.googleapis.com/maps/api/place/textsearch/json?"
    url_ext = "query="+what+","+place+",aveiro&key="+MYKEY
    url = url_base + url_ext

    payload={}
    headers = {
        'Accept': 'application/json'
    }

    response = requests.request("GET", url, headers=headers, data=payload)
    varResp = response.json()
    results = varResp['results']
    
    # get the info from the second page
    if 'next_page_token' in varResp.keys():
        time.sleep(5)
        urlN = url + "&pagetoken=" + varResp['next_page_token']
        responseN = requests.request("GET", urlN, headers=headers, data=payload)
        varRespN = responseN.json()
        results.extend(varRespN['results'])

    return results

In [None]:
results = []
for place in PARISHES:
    for what in PLACES:
        print (what + " - " + place + " :  ")
        
        tmp_result = searchPlaces(what, place)

        for result in tmp_result:
            tmp_dict = {
                'place': what,
                'name': result['name'],
                'lat': result['geometry']['location']['lat'],
                'lon': result['geometry']['location']['lng'],
            }
            tmp_dict['rating'] = result['rating'] if 'rating' in result.keys() else -100
            tmp_dict['user_ratings_total'] = result['user_ratings_total'] if 'user_ratings_total' in result.keys() else -100
            tmp_dict['price_level'] = result['price_level'] if 'price_level' in result.keys() else -100
            
            results.append(tmp_dict)

df_places = pd.DataFrame.from_dict(results)

In [None]:
df_places = df_places\
    .drop_duplicates()\
    .groupby(['name', 'lat', 'lon']).filter(lambda x: len(x) == 1)\
    .reset_index(drop=True)
    
df_places['p1'], df_places['p3'], df_places['p35'] = [0, 0, 0]

# Count how many places there are within a 250 meter radius of each pole

In [None]:
POLES = {
    'p1': (40.63476, -8.66038),
    'p3': (40.64074, -8.65705),
    'p35': (40.63028, -8.65423)
}

In [None]:
# calculates the distance between two points in meters
def calculate_dist(post, lat, lon):
    point_x = (lat, lon)

    return haversine_vector([POLES[post]], [point_x], Unit.METERS)

In [None]:
# identifies places that are within a 250 meters radius of a pole 
for index, row in df_places.iterrows():
    lat = row['lat']
    lon = row['lon']

    for pole in POLES:
        if calculate_dist(pole, lat, lon)[0] <= 250:
            df_places.at[index, pole] = 1 

In [None]:
# counts how many places there are associated with each pole
df_places_agg = df_places.groupby('place').agg({'p1': sum, 'p3': sum, 'p35':sum})
df_places_agg = df_places_agg\
    .T\
    .reset_index()\
    .rename_axis(None, axis=1)\
    .rename(
        columns = {
            'index': 'n_p',
            'restaurant': 'n_rest',
            'bar': 'n_bar',
            'school': 'n_school',
            'park': 'n_park'
        }
    )

In [None]:
# df with the places that are at least close to one pole
df_placesNearPoles = df_places[
    (df_places['p1'] == 1) |
    (df_places['p3'] == 1) |
    (df_places['p35'] == 1) 
].reset_index(drop=True)

In [None]:
# add this info to the existing indicator database
df_indicator = pd.read_parquet('s3://datalake-eu-central-1/ugiO-atchackathon/preprocessed/indicator_regression.parquet')
df_indicator = df_indicator\
    .reset_index()\
    .merge(df_places_agg, on='n_p')\
    .set_index('time_index')

df_indicator["n_points_of_interest"] = df_indicator[["n_bar", "n_park", "n_rest", "n_school"]].sum(axis=1) 

In [None]:
# save dataframes
df_indicator.to_parquet('s3://datalake-eu-central-1/ugiO-atchackathon/preprocessed/indicator_regression_plus_places.parquet')
df_places.to_parquet('../databases/allPlaces.parquet')
df_placesNearPoles.to_parquet('../databases/placesNearPoles.parquet')
df_places_agg.to_parquet('../databases/placesAgg.parquet')
df_indicator.to_parquet('../databases/indicatorPlusPlaces.parquet')

In [None]:
df_indicator


In [None]:
df_places