In [215]:
import os
import re
from pprint import pprint
import yaml
import requests
import json
import pandas as pd
from google.cloud import bigquery

In [214]:
#set the visual style
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 50)

In [206]:
config = yaml.safe_load(open('config.yaml', 'r'))

In [207]:
maps_key = config['google_maps_api_key']

In [210]:
query_client = bigquery.Client()

In [3]:
post_codes_osl = pd.read_csv('post_codes.csv', dtype={'post_code':str}).set_index('post_code', drop=True)

In [146]:
post_codes_osl.head()

Unnamed: 0_level_0,lat,lng,municipality,neighborhood_name,population
post_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,59.9116,10.7545,oslo,sentrum,
9,59.91,10.75,oslo,,
10,59.9171,10.7274,oslo,sentrum,
14,59.9116,10.7545,oslo,,
15,59.9061,10.7392,oslo,sentrum,


In [197]:
def get_maps_places_response(coords, radius, key):
    google_maps_url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json'
    params = {'location': coords, 'radius': radius, 'key': key}
    response = requests.get(google_maps_url, params=params)
    return response

In [201]:
columns = ['post_code', 'lat', 'lng', 'radius', 'response']

In [199]:
data = []
for radius in [500]:
    for post_code, lat, lng in zip(post_codes_osl.index, post_codes_osl.lat, post_codes_osl.lng):
        coords = str(lat) + ',' + str(lng)
        response = get_maps_places_response(coords, radius, maps_key)
        response_str = json.dumps(response.json())
        data.append([post_code, lat, lng, radius, response_str])

In [203]:
post_code_data = pd.DataFrame(data, columns=columns)

In [209]:
post_code_data.head()

Unnamed: 0,post_code,lat,lng,radius,response
0,1,59.9116,10.7545,500,"{""html_attributions"": [], ""next_page_token"": ""CqQCFQEAAIQlU6HTjwLweE46xaTVsK4OZOUKFrrTNicG8MPkqB..."
1,9,59.91,10.75,500,"{""html_attributions"": [], ""next_page_token"": ""CqQCFQEAAOkvhtZYQNSjIx6VewpBh21Ip0AV2-btTlMG2uMsDv..."
2,10,59.9171,10.7274,500,"{""html_attributions"": [], ""next_page_token"": ""CqQCFQEAANN-yAM8BzkuOMyuzoimwn6-gHGdcAbFopgSadBVi0..."
3,14,59.9116,10.7545,500,"{""html_attributions"": [], ""next_page_token"": ""CqQCFQEAAAClaAAndRbf8GbvkM-WZ0a9p0o4oe27wy-rnd-qfA..."
4,15,59.9061,10.7392,500,"{""html_attributions"": [], ""next_page_token"": ""CqQCFQEAAGxYKzHWr9NxMG89movvZz6ytLvpotdeepsvowywNA..."


In [223]:
post_code_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 995 entries, 0 to 994
Data columns (total 5 columns):
post_code    995 non-null object
lat          995 non-null float64
lng          995 non-null float64
radius       995 non-null int64
response     995 non-null object
dtypes: float64(2), int64(1), object(2)
memory usage: 38.9+ KB


In [220]:
post_code_data.to_gbq(destination_table='neighborhood_data.post_code_responses',
                      project_id='hde-test-clean',
                      if_exists='append')

In [221]:
attributes = ['price_level', 'rating']

In [222]:
columns = ['post_code', 'radius', 'place_name', 'place_type', 'place_attribute', 'attribute_value']

In [228]:
data = []

for radius in post_code_data.radius.unique():
    for post_code, response in zip(post_code_data.post_code, post_code_data.response):
        response_dict = json.loads(response)
        response_res = response_dict['results']
        for place in response_res:
            place_name = place['name']
            for place_type in place['types']:
                for attr in attributes:
                    data.append([post_code, radius, place_name, place_type, attr, place.get(attr)])

In [231]:
post_code_places = pd.DataFrame(data, columns=columns)

In [232]:
post_code_places.head()

Unnamed: 0,post_code,radius,place_name,place_type,place_attribute,attribute_value
0,1,500,Oslo,locality,price_level,
1,1,500,Oslo,locality,rating,
2,1,500,Oslo,political,price_level,
3,1,500,Oslo,political,rating,
4,1,500,Oslo Opera House,premise,price_level,


In [233]:
post_code_places.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 117498 entries, 0 to 117497
Data columns (total 6 columns):
post_code          117498 non-null object
radius             117498 non-null int64
place_name         117498 non-null object
place_type         117498 non-null object
place_attribute    117498 non-null object
attribute_value    42240 non-null float64
dtypes: float64(1), int64(1), object(4)
memory usage: 5.4+ MB


In [236]:
post_code_places.to_gbq(destination_table='neighborhood_data.post_code_places',
                        project_id='hde-test-clean',
                        if_exists='append')