In [232]:
import os
import pandas as pd
import gmaps
import googlemaps as g
import yaml
import numpy
import matplotlib.pyplot as plt
from IPython.display import Image
import foursquare
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.model_selection import train_test_split
from sklearn import linear_model


%matplotlib inline
plt.style.use('ggplot')

pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 20)
pd.set_option('display.notebook_repr_html', True)

# Loading dataset
df = pd.read_csv(os.path.join('..', 'datasets', 'foursquare_idmapping.csv'))
# df = df.set_index(id)
df.drop(['Unnamed: 0'],inplace = True, axis = 1)
df.drop(['business_name'],inplace = True, axis = 1)
df.drop(['location'],inplace = True, axis = 1)
df.head()



Unnamed: 0,inspection_score,foursquare_id,foursquare_name
0,87.4,4ae289b9f964a520e78e21e3,Cafe Tiramisu
1,94.0,4c1a7695eac020a1105043c2,Nrgize Lifestyle Cafe
2,96.333333,,
3,87.75,4bb7d9273db7b71353aa209a,Norman's Ice Cream & Freezes
4,83.2,4ab90c8bf964a520d27d20e3,Charlie's Deli Cafe


In [233]:

grouped_df = df.groupby(['foursquare_id','foursquare_name'], as_index = False)['inspection_score'].mean()
grouped_df.head()

Unnamed: 0,foursquare_id,foursquare_name,inspection_score
0,3fd66200f964a52001ed1ee3,Pacific Cafe,86.0
1,3fd66200f964a52005ed1ee3,Taiwan Restaurant 台灣飯店,74.111111
2,3fd66200f964a52006ed1ee3,Tanuki Restaurant 狸壽司,92.0
3,3fd66200f964a52013ed1ee3,Acquerello,98.666667
4,3fd66200f964a52017ed1ee3,Ebisu,80.0


In [234]:
# Applying foursquare credentials
with open('foursquare.yaml', 'r') as f:
    fs_credentials = yaml.load(f)

fs_client_id = fs_credentials['client-id']
fs_client_secret = fs_credentials['client-secret']



In [235]:
client = foursquare.Foursquare(client_id=fs_client_id, client_secret=fs_client_secret)

In [236]:
# df.dropna(subset = ['foursquare_id'], inplace = True)
len(grouped_df)

4482

In [237]:
def foursquare_attributes(id):
    restaurant = client.venues(id)
    if restaurant['venue']:
        r = restaurant['venue']       
        return {
            'foursquare_id': id,
            'price': r.get('price', {}).get('tier'),
            'hasMenu': r.get('hasMenu') != None,
            'rating': r.get('rating'),
            'ratingSignals': r.get('ratingSignals'), 
            'photos': r.get('photos').get('count'),
            'isVerified': r.get('verified') != None,
#             'hasReservations': r.get('attributes').get('groups').'type'.'reservations'.get('displayValue') != None,
            'tipCount': r.get('stats').get('tipCount'),
#              'hasOutdoorSeating': r.get('attributes').get('groups')[2].get('items')[1] != None,
            'createdAt': r.get('createdAt'),
            'checkinsCount': r.get('stats').get('checkinsCount')
           
        }
    
r = foursquare_attributes(df.foursquare_id[0])
# print attributes

In [238]:
{}.get('a')
grouped_df.head()

Unnamed: 0,foursquare_id,foursquare_name,inspection_score
0,3fd66200f964a52001ed1ee3,Pacific Cafe,86.0
1,3fd66200f964a52005ed1ee3,Taiwan Restaurant 台灣飯店,74.111111
2,3fd66200f964a52006ed1ee3,Tanuki Restaurant 狸壽司,92.0
3,3fd66200f964a52013ed1ee3,Acquerello,98.666667
4,3fd66200f964a52017ed1ee3,Ebisu,80.0


In [239]:
foursquare_rows = [
    foursquare_attributes(grouped_df.foursquare_id[i]) 
    for i in range(0, len(grouped_df)) if not(pd.isnull(grouped_df.foursquare_id[i]))
]
# foursquare_rows

In [240]:
foursquare_df = pd.DataFrame(foursquare_rows)
foursquare_df

Unnamed: 0,checkinsCount,createdAt,foursquare_id,hasMenu,isVerified,photos,price,rating,ratingSignals,tipCount
0,1389,1071014400,3fd66200f964a52001ed1ee3,True,True,78,2.0,8.1,59.0,24
1,2173,1071014400,3fd66200f964a52005ed1ee3,True,True,185,1.0,7.2,37.0,12
2,1149,1071014400,3fd66200f964a52006ed1ee3,True,True,46,2.0,7.9,34.0,13
3,1185,1071014400,3fd66200f964a52013ed1ee3,True,True,72,4.0,8.5,108.0,41
4,5047,1071014400,3fd66200f964a52017ed1ee3,True,True,254,3.0,9.4,307.0,88
5,2275,1071014400,3fd66200f964a52018f11ee3,False,True,103,2.0,7.5,78.0,20
6,3775,1071014400,3fd66200f964a52019f11ee3,True,True,167,1.0,8.8,190.0,82
7,6945,1071014400,3fd66200f964a5201af11ee3,True,True,193,2.0,8.1,271.0,108
8,2067,1071014400,3fd66200f964a5201bed1ee3,True,True,76,3.0,7.7,93.0,23
9,3646,1071014400,3fd66200f964a52021ed1ee3,True,True,150,2.0,8.1,152.0,46


In [241]:
# df.join(foursquare_df, on='foursquare_id', lsuffix='l_', rsuffix='r_')
df2 = grouped_df.merge(foursquare_df, on='foursquare_id')
df2


Unnamed: 0,foursquare_id,foursquare_name,inspection_score,checkinsCount,createdAt,hasMenu,isVerified,photos,price,rating,ratingSignals,tipCount
0,3fd66200f964a52001ed1ee3,Pacific Cafe,86.000000,1389,1071014400,True,True,78,2.0,8.1,59.0,24
1,3fd66200f964a52005ed1ee3,Taiwan Restaurant 台灣飯店,74.111111,2173,1071014400,True,True,185,1.0,7.2,37.0,12
2,3fd66200f964a52006ed1ee3,Tanuki Restaurant 狸壽司,92.000000,1149,1071014400,True,True,46,2.0,7.9,34.0,13
3,3fd66200f964a52013ed1ee3,Acquerello,98.666667,1185,1071014400,True,True,72,4.0,8.5,108.0,41
4,3fd66200f964a52017ed1ee3,Ebisu,80.000000,5047,1071014400,True,True,254,3.0,9.4,307.0,88
5,3fd66200f964a52018f11ee3,Durty Nelly's,93.111111,2275,1071014400,False,True,103,2.0,7.5,78.0,20
6,3fd66200f964a52019f11ee3,Art's Cafe,92.500000,3775,1071014400,True,True,167,1.0,8.8,190.0,82
7,3fd66200f964a5201af11ee3,Grubstake Diner,98.000000,6945,1071014400,True,True,193,2.0,8.1,271.0,108
8,3fd66200f964a5201bed1ee3,Fringale,88.600000,2067,1071014400,True,True,76,3.0,7.7,93.0,23
9,3fd66200f964a52021ed1ee3,Marnee Thai,81.397436,3646,1071014400,True,True,150,2.0,8.1,152.0,46


In [242]:
df2.count(axis=0, level=None, numeric_only=False)

foursquare_id       4482
foursquare_name     4482
inspection_score    4482
checkinsCount       4482
createdAt           4482
hasMenu             4482
isVerified          4482
photos              4482
price               3362
rating              3320
ratingSignals       3320
tipCount            4482
dtype: int64

In [243]:
len(df2)

4482

In [244]:
# Saving the foursquare id mapping to a csv for use for EDA and modeling

df2.to_csv(os.path.join('..','datasets','venue_attributes.csv'))