In [None]:
import requests
from urllib.parse import quote_plus
import pandas as pd
from sklearn.externals import joblib
from time import sleep
import numpy as np

In [None]:
def get_place(token, place_name, circle, latitude, longitude):
    
    place_name = quote_plus(place_name)
    
    fields = 'formatted_address,geometry,id,name,permanently_closed,place_id,types'
    url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input=%s&inputtype=textquery&language=pl&fields=%s&locationbias=circle:%i@%s,%s&key=%s" % (place_name, fields, circle, latitude, longitude, token)
    
    resp = requests.get(url)
    data = resp.json()
    
    return data

##### Deserializacja danych

In [None]:
fb = joblib.load('../pickles/fb.pkl')
ss = joblib.load('../pickles/ss.pkl')
cjg = joblib.load('../pickles/cjg.pkl')
sk = joblib.load('../pickles/sk.pkl')
um = joblib.load('../pickles/um.pkl')
concatenation = joblib.load('../pickles/concatenation.pkl')

##### Ustawienie tokena i obszaru przeszukiwania

In [None]:
token = '...'
circle = 50000
latitude = 50.270908
longitude = 19.039993

##### Wygenerowanie unikalnych miejsc wydarzeń i adresów

In [None]:
place_dict = concatenation[['place_name', 'street']].drop_duplicates().to_dict(orient='record')

###### Pobranie danych z api google

In [None]:
google_data = []
i = 0

for row in place_dict:
    
    place = row['place_name'] + ' ' + row['street']
    g_data = get_place(token, place, circle, latitude, longitude)
    google_data.append({'place_name': row['place_name'], 'street': row['street'], 'google_data' : g_data})
    i += 1
       
    print(i)
    sleep(1)

##### Serializacja danych z google

In [None]:
#joblib.dump(google_data, '../pickles/google_places.pkl')
google_data = joblib.load('../pickles/google_places.pkl')

##### Spłaszczenie słownika miejsc

In [None]:
google_places = []
i = 0

for t in google_data:
    place_name = t['place_name']
    street = t['street']
    if t['google_data']['candidates']:
        if 'formatted_address' in t['google_data']['candidates'][0].keys():
            formatted_address = t['google_data']['candidates'][0]['formatted_address']
        id = t['google_data']['candidates'][0]['id']
        place_id = t['google_data']['candidates'][0]['place_id']
        name = t['google_data']['candidates'][0]['name']
        if 'types' in t['google_data']['candidates'][0].keys():
            types = t['google_data']['candidates'][0]['types']
        lat = t['google_data']['candidates'][0]['geometry']['location']['lat']
        lng = t['google_data']['candidates'][0]['geometry']['location']['lng']
        
        google_places.append({'place_name': place_name,
                 'street': street,
                 'formatted_address': formatted_address,
                 'id': id,
                 'place_id': place_id,
                 'name': name,
                 'types': types,
                 'lat': lat,
                 'lng': lng})
    
google_places = pd.DataFrame(google_places)

##### Zrzut słownika miejsc do pliu

In [None]:
google_places[['id','place_id','place_name','name','street','formatted_address','lat','lng','types']].to_csv('../output/google_places.csv')

##### Standaryzacja miejsc eventów

In [None]:
concatenation_std = concatenation.merge(google_places[['name','place_name', 'street', 'formatted_address','lat','lng','types']],left_on=['place_name', 'street'], right_on=['place_name', 'street'], how='left')
concatenation_std['place_name'] = np.where(concatenation_std["name_y"].notnull(), concatenation_std["name_y"], concatenation_std["place_name"] )
concatenation_std['street'] = np.where(concatenation_std["formatted_address"].notnull(), concatenation_std["formatted_address"], concatenation_std["street"] )
concatenation_std = concatenation_std.drop(columns=['name_y','formatted_address'])
concatenation_std.rename(columns={'name_x': 'name'}, inplace=True)

##### Uwzględnienie zmian lokalizacji

In [None]:
locations = pd.read_csv('../data/zmiany lokalizacji.csv')
locations['change_date'] = pd.to_datetime(locations['change_date'])

In [None]:
concatenation_std = concatenation_std.merge(locations, left_on='place_name', right_on='place', how='left')
concatenation_std = concatenation_std[~((concatenation_std['start_time'] < concatenation_std['change_date']) & (concatenation_std['street_old'].isnull()))]
concatenation_std['change_date'] = concatenation_std['change_date'].fillna(concatenation_std['start_time'])
concatenation_std['place_name'] = np.where(concatenation_std['old_name'].notnull() , concatenation_std['old_name'], concatenation_std['place_name'])
concatenation_std['street'] = np.where(concatenation_std['start_time'] < concatenation_std['change_date'], concatenation_std['street_old'], concatenation_std['street'])
concatenation_std['lat'] = np.where(concatenation_std['start_time'] < concatenation_std['change_date'], concatenation_std['lat_old'], concatenation_std['lat'])
concatenation_std['lng'] = np.where(concatenation_std['start_time'] < concatenation_std['change_date'], concatenation_std['lng_old'], concatenation_std['lng'])
concatenation_std.drop(columns = locations.columns, inplace=True)

##### Serializacja danych

In [None]:
joblib.dump(concatenation_std, '../pickles/concatenation.pkl')