In [135]:
import os
import pandas as pd
import gmaps
import googlemaps as g
import yaml
import numpy
import matplotlib.pyplot as plt
from IPython.display import Image
import foursquare
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.model_selection import train_test_split
from sklearn import linear_model


%matplotlib inline
plt.style.use('ggplot')

pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 20)
pd.set_option('display.notebook_repr_html', True)

# Loading dataset
df = pd.read_csv(os.path.join('..', 'datasets', 'foursquare_idmapping.csv'))
# df = df.set_index(id)
df.drop(['Unnamed: 0'],inplace = True, axis = 1)
df.head()
len(df)


5568

In [136]:
# Applying foursquare credentials
with open('foursquare.yaml', 'r') as f:
    fs_credentials = yaml.load(f)

fs_client_id = fs_credentials['client-id']
fs_client_secret = fs_credentials['client-secret']



In [137]:
client = foursquare.Foursquare(client_id=fs_client_id, client_secret=fs_client_secret)

In [129]:
# df.dropna(subset = ['foursquare_id'], inplace = True)
len(df)

5568

In [130]:
def foursquare_attributes(id):
    restaurant = client.venues(id)
    if restaurant['venue']:
        r = restaurant['venue']       
        return {
            'foursquare_id': id,
            'price': r.get('price', {}).get('tier'),
            'hasMenu': r.get('hasMenu') != None,
            'rating': r.get('rating'),
            'ratingSignals': r.get('ratingSignals'), 
            'photos': r.get('photos').get('count'),
#             'hasReservations': r.get('attributes').get('groups').'type'.'reservations'.get('displayValue') != None,
            'tipCount': r.get('stats').get('tipCount'),
#              'hasOutdoorSeating': r.get('attributes').get('groups')[2].get('items')[1] != None,
            'checkinsCount': r.get('stats').get('checkinsCount')
        }
    
r = foursquare_attributes(df.foursquare_id[0])
# print attributes

In [138]:
{}.get('a')

In [132]:
foursquare_rows = [
    foursquare_attributes(df.foursquare_id[i]) 
    for i in range(0, len(df)) if not(pd.isnull(df.foursquare_id[i]))
]
# foursquare_rows

In [139]:
foursquare_df = pd.DataFrame(foursquare_rows)
foursquare_df

Unnamed: 0,checkinsCount,foursquare_id,hasMenu,photos,price,rating,ratingSignals,tipCount
0,1490,4ae289b9f964a520e78e21e3,True,79,3.0,6.3,56.0,18
1,149,4c1a7695eac020a1105043c2,False,1,1.0,,,1
2,798,4bb7d9273db7b71353aa209a,True,53,1.0,8.9,71.0,24
3,916,4ab90c8bf964a520d27d20e3,False,41,1.0,7.4,33.0,10
4,3775,3fd66200f964a52019f11ee3,True,167,1.0,8.8,190.0,82
5,3116,4c1f9a12b306c92885ad68b7,True,133,3.0,8.2,195.0,72
6,113,4dbcbee46e810768bf678f0f,False,0,,,,0
7,2313,44d756a9f964a52068361fe3,False,62,1.0,6.1,71.0,30
8,7425,4a6f5d0ef964a520fdd51fe3,True,92,1.0,8.9,353.0,83
9,3416,4aff1091f964a520b53322e3,True,350,1.0,7.2,141.0,41


In [140]:
# df.join(foursquare_df, on='foursquare_id', lsuffix='l_', rsuffix='r_')
df2 = df.merge(foursquare_df, on='foursquare_id')
df2


Unnamed: 0,business_name,location,inspection_score,foursquare_id,foursquare_name,checkinsCount,hasMenu,photos,price,rating,ratingSignals,tipCount
0,Tiramisu Kitchen,"(37.791116, -122.403816)",87.400000,4ae289b9f964a520e78e21e3,Cafe Tiramisu,1490,True,79,3.0,6.3,56.0,18
1,Nrgize Lifestyle Cafe,"(37.786848, -122.421547)",94.000000,4c1a7695eac020a1105043c2,Nrgize Lifestyle Cafe,149,False,1,1.0,,,1
2,Norman's Ice Cream and Freezes,"(37.807155, -122.419004)",87.750000,4bb7d9273db7b71353aa209a,Norman's Ice Cream & Freezes,798,True,53,1.0,8.9,71.0,24
3,CHARLIE'S DELI CAFE,"(37.747114, -122.413641)",83.200000,4ab90c8bf964a520d27d20e3,Charlie's Deli Cafe,916,False,41,1.0,7.4,33.0,10
4,ART'S CAFE,"(37.764013, -122.465749)",92.500000,3fd66200f964a52019f11ee3,Art's Cafe,3775,True,167,1.0,8.8,190.0,82
5,SUSHI ZONE,"(37.771437, -122.423892)",82.500000,4c1f9a12b306c92885ad68b7,Sushi Zone,3116,True,133,3.0,8.2,195.0,72
6,RHODA GOLDMAN PLAZA,"(37.784626, -122.437734)",94.666667,4dbcbee46e810768bf678f0f,Rhoda Goldman Plaza,113,False,0,,,,0
7,CAFE X + O,"(37.742325, -122.426476)",91.333333,44d756a9f964a52068361fe3,Cafe XO,2313,False,62,1.0,6.1,71.0,30
8,Oasis Grill,"(37.794483, -122.396584)",75.000000,4a6f5d0ef964a520fdd51fe3,Oasis Grill,7425,True,92,1.0,8.9,353.0,83
9,Chowders,"(37.80824, -122.410189)",93.333333,4aff1091f964a520b53322e3,Chowders,3416,True,350,1.0,7.2,141.0,41


In [141]:
df2.count(axis=0, level=None, numeric_only=False)

business_name       98
location            98
inspection_score    98
foursquare_id       98
foursquare_name     98
checkinsCount       98
hasMenu             98
photos              98
price               79
rating              79
ratingSignals       79
tipCount            98
dtype: int64

In [142]:
len(df)

5568

In [143]:
# Splitting the dataframe into test and train 
df_train, df_test = train_test_split(df2,test_size=0.4, random_state=42)

In [144]:
len(df_train)

58