In [1]:
import http3
import os
import pandas as pd
import time

# constants
FOURSQUARE_API = 'Foursquare'
YELP_API = 'Yelp'

# Open the stations CSV file created on the city_bikes notebook 
df_stations = pd.read_csv(r'../data/stations.csv', sep=';', encoding='utf-8')
df_stations

Unnamed: 0.1,Unnamed: 0,name,latitude,longitude,number_of_bikes,ebikes,normal_bikes,city
0,0,Transit Center,34.85028,-82.40107,7,,,"Greenville, SC"
1,1,Link West End,34.84511,-82.40491,4,,,"Greenville, SC"
2,2,NEXT Innovation Center,34.84043,-82.39883,5,,,"Greenville, SC"
3,3,College & Main,34.85399,-82.39797,6,,,"Greenville, SC"
4,4,City Hall,34.84841,-82.40027,3,,,"Greenville, SC"
5,5,Caine Halter YMCA,34.83585,-82.38589,0,,,"Greenville, SC"
6,6,Greenville Zoo,34.84717,-82.38581,2,,,"Greenville, SC"
7,7,Swamp Rabbit Cafe,34.86926,-82.42157,5,,,"Greenville, SC"
8,8,Fluor Field,34.8429,-82.40897,4,,,"Greenville, SC"
9,9,Unity Park,34.85409,-82.41556,3,,,"Greenville, SC"


In [2]:
def remove_latitude_and_longitude_duplicated(list_of_places):
    """
    remove_latitude_and_longitude_duplicated function will remove all rows identified with the same latitude and longitude

    :param list_of_places: list of the places retrieved by API
    :return: a DataFrame without duplicates
    """
    df = pd.DataFrame(list_of_places)
    df.sort_values(by=['name'], inplace=True)
    df['latitude'] = df['latitude'].astype(str)
    df['longitude'] = df['longitude'].astype(str)
    return df.drop_duplicates(['latitude', 'longitude'], keep="first")

async def request_get(url, params, source):
    """
    request_get function will make a GET request for the url

    :param url: API url
    :param params: a dictionary with all parameters
    :param source: a constant string: FOURSQUARE_API or YELP_API
    :return: the response
    """
    
    fsq_headers = {
        "Accept": "application/json",
        "Authorization": "fsq3/C/xb5X+ZrazPtfTkdxMSzWKg8MfaVAbbkX9fhJtKK8="
    }
    yelp_headers = {
        "Accept": "application/json",
        "Authorization": "Bearer CTEY8fI6ubzkB8WUEBlu2EHGC5VP9rIatbjNSH73nzHGH6LMznQyqKXGba8e1yASRVujuxGtCpfNB8ZM7cwom7v4cV9BO0mJUkNnYxIQxKIpFm3-ab026LnyP6vnZHYx"
    }

    client = http3.AsyncClient()
    return await client.get(url, params=params, headers= fsq_headers if source == FOURSQUARE_API else yelp_headers, verify=False)

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [3]:
# FOURSQUARE

async def foursquare_search(params, df_stations):
    """
    foursquare_search function will retrieve all station details and return a dictionary 

    :param params: a dictionary with the parameters to make a search. Reference: https://location.foursquare.com/developer/reference/place-search
    :param df_stations: it is the stations from the CSV file in a DataFrame. (top of this notebook) 
    :return: an array with the search result
    """
    print("FOURSQUARE: Retrieving data for '" + search + "' ...")
    result = []
    
    # get the start time
    st = time.time()
    
    for _, station in df_stations.iterrows():
        url = "https://api.foursquare.com/v3/places/search"
        
        params['ll'] = str(station['latitude']) + "," + str(station['longitude'])

        response = await request_get(url, params, FOURSQUARE_API)

        if (response.status_code == 200):
            res = response.json()

            for data in res['results']:
                url = "https://api.foursquare.com/v3/places/" + data['fsq_id']
                place_params = {
                    "fields": "rating,price,tel"
                }
                place_detail_response = await request_get(url, place_params, FOURSQUARE_API)
                place_detail = place_detail_response.json()
                result.append({ 'id': data['fsq_id'],
                                'name': data['name'],
                                'address': data['location']['formatted_address'],
                                'rating': place_detail['rating'] if 'rating' in place_detail else None,
                                'price': place_detail['price'] if 'price' in place_detail else None,
                                'phone': place_detail['tel'] if 'tel' in place_detail else None,
                                'distance': data['distance'],
                                'latitude': data['geocodes']['main']['latitude'],
                                'longitude': data['geocodes']['main']['longitude'],
                                'source': 'Foursquare',
                                'city': station['city'],
                                'station': station['name'],
                                'station_latitude': station['latitude'],
                                'station_longitude': station['longitude'],
                                'station_number_of_bikes': station['number_of_bikes']
                                })
        else:
            print("Error: Status Code [" + str(response.status_code) + "]")
            print("URL: " + url)
            print("Parameters:")
            print(parameters)
    print("Found " + str(len(result)) + " places for the search term: '" + search + "'")
    # get the end time
    et = time.time()
    # get the execution time
    elapsed_time = et - st
    print('Execution time:', elapsed_time, 'seconds')
    return result

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [4]:
search = "restaurant"
radius = 1000
params = {
    "query": search,
    "radius": radius,
    "sort":"DISTANCE"
}
result = await foursquare_search(params, df_stations)

FOURSQUARE: Retrieving data for 'restaurant' ...
Found 102 places for the search term: 'restaurant'
Execution time: 36.71705985069275 seconds


Put your parsed results into a DataFrame

In [5]:
'''
    DATA CLEANING
    
    - Remove the rows with the latitude and longitude diplicated
    - Remove all rows where the price, distance or rating column are missing values
'''
# Remove duplicates
df_foursquare = remove_latitude_and_longitude_duplicated(result)
# Drop all rows that have NaN/None values
df_foursquare_cleaned=df_foursquare.dropna(subset=['price', 'distance', 'rating'])
df_foursquare_cleaned.sort_values(by=['station', 'name'])

print("After data cleaning, we have {number_of_places} places.".format(number_of_places=len(df_foursquare_cleaned)))

# Save the result for a CSV file
df_foursquare_cleaned.to_csv(r'../data/foursquare.csv', sep=';', encoding='utf-8', header='true')
df_foursquare_cleaned

After data cleaning, we have 28 places.


Unnamed: 0,id,name,address,rating,price,phone,distance,latitude,longitude,source,city,station,station_latitude,station_longitude,station_number_of_bikes
1,4b5487f4f964a520cabe27e3,Barley's Taproom & Piz,25 W Washington St (Btw S Main & S Laurens St....,8.0,1.0,(864) 232-3706,124,34.850802,-82.399831,Foursquare,"Greenville, SC",Transit Center,34.85028,-82.40107,7
27,4b76d4faf964a52099622ee3,Brick Street Cafe,"315 Augusta St, Greenville, SC 29601",8.4,2.0,(864) 421-0111,674,34.840406,-82.406261,Foursquare,"Greenville, SC",NEXT Innovation Center,34.84043,-82.39883,5
21,4b6c5b63f964a5204f332ce3,Chicora Alley,"608 S Main St, Greenville, SC 29601",8.3,2.0,(864) 232-4100,617,34.845138,-82.402374,Foursquare,"Greenville, SC",NEXT Innovation Center,34.84043,-82.39883,5
25,540ae735498ed28daaa5af37,Eggs Up Grill,"31 Augusta St, Greenville, SC 29601",8.3,2.0,(864) 520-2005,662,34.843497,-82.404685,Foursquare,"Greenville, SC",NEXT Innovation Center,34.84043,-82.39883,5
41,50575585e4b01ef823440c65,Grill Marks - Main St. Greenville,"209 S Main St, Greenville, SC 29601",8.2,2.0,(864) 233-5825,47,34.848172,-82.400019,Foursquare,"Greenville, SC",City Hall,34.84841,-82.40027,3
85,569e8e21498e0c3b024d94c4,Halls Chophouse Greenville,"550 S Main St, Greenville, SC 29601",8.8,2.0,(864) 335-4200,160,34.845661,-82.402022,Foursquare,"Greenville, SC",River Walk,34.84722,-82.40256,2
29,4b3b8e2ef964a520b97525e3,Larkins On The River,"318 S Main St (Downtown), Greenville, SC 29601",8.6,4.0,(864) 467-9777,769,34.846894,-82.401523,Foursquare,"Greenville, SC",NEXT Innovation Center,34.84043,-82.39883,5
31,5a0f285e916bc10c34507e9a,Maple Street Biscuit Company,"18 E North St, Greenville, SC 29601",7.9,1.0,(864) 520-8724,208,34.852047,-82.397509,Foursquare,"Greenville, SC",College & Main,34.85399,-82.39797,6
58,4cd984db2a87a143aa27b309,Mike & Jeff's BBQ Diner,"2401 Old Buncombe Rd, Greenville, SC 29609",8.7,2.0,(864) 271-5225,981,34.876639,-82.415539,Foursquare,"Greenville, SC",Swamp Rabbit Cafe,34.86926,-82.42157,5
59,4bc7bca414d79521260c68e9,Purple International Bistro & Sushi,"933 S Main St, Greenville, SC 29601",8.3,2.0,(864) 232-3255,55,34.843098,-82.408507,Foursquare,"Greenville, SC",Fluor Field,34.8429,-82.40897,4


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [6]:
# YELP

async def yelp_search(params, df_stations):
    """
    yelp_search function will retrieve all station details and return a dictionary 

    :param params: a dictionary with the parameters to make a search. Reference: https://docs.developer.yelp.com/reference/v3_business_search
    :param df_stations: it is the stations from the CSV file in a DataFrame. (top of this notebook) 
    :return: an array with the search result
    """
    
    print("YELP: Retrieving data for '" + search + "' ...")
    result = []
    
    # get the start time
    st = time.time()
    
    for _, station in df_stations.iterrows():
        url = "https://api.yelp.com/v3/businesses/search"
        
        params["latitude"] = station['latitude']
        params["longitude"] = station['longitude']

        response = await request_get(url, params, YELP_API)
        
        res = response.json()['businesses']
        if (response.status_code == 200):
            for data in res:
                result.append({ 'id': data['id'],
                                'name': data['name'],
                                'address': data['location']['display_address'][0],
                                'rating': data['rating'],
                                'price': data['price'] if 'price' in data else None,
                                'phone': data['display_phone'],
                                'distance': data['distance'],
                                'latitude': data['coordinates']['latitude'],
                                'longitude': data['coordinates']['longitude'],
                                'source': 'Yelp',
                                'city': station['city'],
                                'station': station['name'],
                                'station_latitude': station['latitude'],
                                'station_longitude': station['longitude'],
                                'station_number_of_bikes': station['number_of_bikes']
                                })
        else:
            print("Error: Status Code [" + str(response.status_code) + "]")
            print("URL: " + url)
            print("Parameters:")
            print(parameters)
    print("Found " + str(len(result)) + " places for the search term: '" + search + "'")
    # get the end time
    et = time.time()
    # get the execution time
    elapsed_time = et - st
    print('Execution time:', elapsed_time, 'seconds')
    return result

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [7]:
search = "restaurant"
radius = 1000
params = {
    "term": search,
    "radius": radius,
    "sort_by": "distance"
}
result = await yelp_search(params, df_stations)

YELP: Retrieving data for 'restaurant' ...
Found 231 places for the search term: 'restaurant'
Execution time: 6.453224182128906 seconds


Put your parsed results into a DataFrame

In [8]:
'''
    DATA CLEANING
    
    - Remove the rows with the latitude and longitude diplicated
    - Remove all rows where the price, distance or rating column are missing values
    - Convert data type:
        - Price:
            - Yelp: Value is one of $, $$, $$$ and $$$$.
            - Foursquare: A numerical value (from 1 to 4)
            - Yelp use a string(currency symbols) to classify the price, I have reclassified them like Foursquare(number).
        - Rating:
            - Yelp: Rating for this business (value ranges from 1, 1.5, ... 4.5, 5).
            - Foursquare: A numerical rating (from 0.0 to 10.0) 
            - Foursquare's rating classifies from 1 to 10 and Yelp from 1 to 5. And then, I have reclassified the Yelp rating from 1 to 10.
            
    Documentation:
        - Yelp: https://docs.developer.yelp.com/reference/v3_business_search
        - Foursuqare: https://location.foursquare.com/developer/reference/response-fields
'''

# Remove duplicates
df_yelp = remove_latitude_and_longitude_duplicated(result)
# Drop all rows that have NaN/None values
df_yelp_cleaned=df_yelp.dropna(subset=['price', 'distance', 'rating'])
df_yelp_cleaned.sort_values(by=['station', 'name'])

# Foursquare's rating classifies from 1 to 10 and Yelp from 1 to 5.And then, I have reclassified the Yelp rating from 1 to 10.
df_yelp_cleaned['rating'] = df_yelp_cleaned['rating'] * 2

# Yelp use a string(currency symbols) to classify the price, I have reclassified them like Foursquare(number).
df_yelp_cleaned['price'] = df_yelp_cleaned.apply(lambda row: format(len(row['price']), '.1f'), axis=1)

print("After data cleaning, we have {number_of_places} places.".format(number_of_places=len(df_yelp_cleaned)))
df_yelp_cleaned.to_csv(r'../data/yelp.csv', sep=';', encoding='utf-8', header='true')
df_yelp_cleaned

After data cleaning, we have 90 places.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_yelp_cleaned['rating'] = df_yelp_cleaned['rating'] * 2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_yelp_cleaned['price'] = df_yelp_cleaned.apply(lambda row: format(len(row['price']), '.1f'), axis=1)


Unnamed: 0,id,name,address,rating,price,phone,distance,latitude,longitude,source,city,station,station_latitude,station_longitude,station_number_of_bikes
68,sB7UpZeBzN2Lw76qU9XuBg,Antonino Bertolo's Pizza,200 N Main St,7.0,1.0,(864) 467-9555,140.357933,34.852729,-82.397901,Yelp,"Greenville, SC",College & Main,34.85399,-82.39797,6
119,di1jOLuJDg_qxXwY81JEhg,Aryana Afghan Cuisine,210 E Coffee St,9.0,2.0,(864) 236-7410,1061.033026,34.8506622314453,-82.3966064453125,Yelp,"Greenville, SC",Greenville Zoo,34.84717,-82.38581,2
161,SGOw7GeyZaSw8q3doiBIZw,Automatic Taco,147 Welborn St,9.0,1.0,(864) 203-5704,182.987591,34.8531522,-82.41391209999999,Yelp,"Greenville, SC",Unity Park,34.85409,-82.41556,3
217,Tv3bOWsuamxjk-W6x9pB8A,Basil Thai Cuisine,9 N Laurens St,8.0,2.0,(864) 609-4120,188.670518,34.851551,-82.3999017,Yelp,"Greenville, SC",Reedy Rides 50SR,34.85056,-82.40158,0
145,0NBo2hP22P80KQnkyRttnw,Bex Cafe & Juice Bar,820 S Main St,9.0,1.0,(864) 552-1509,245.814050,34.844097,-82.406646,Yelp,"Greenville, SC",Fluor Field,34.84290,-82.40897,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
213,jT6W_waEWcaoKFP2Pp6kPQ,W XYZ Bar,5 North Laurens St,8.0,2.0,(864) 297-6100,156.448915,34.8512544,-82.3999951,Yelp,"Greenville, SC",Reedy Rides 50SR,34.85056,-82.40158,0
124,JCo5Glxhlnirl-UGpLpF8Q,White Duck Taco Shop,1320 Hampton Ave Ext,9.0,1.0,(864) 609-4150,638.581379,34.865887,-82.415905,Yelp,"Greenville, SC",Swamp Rabbit Cafe,34.86926,-82.42157,5
104,Dklu2K-j2YdquwmJ3pJe5A,Willy Taco - Feed & Seed,217 Laurens Rd,8.0,2.0,(864) 412-8700,812.474955,34.85159,-82.37863,Yelp,"Greenville, SC",Greenville Zoo,34.84717,-82.38581,2
42,AkZkBkTCMZ3zaKOPLl16Jg,Zaxby's Chicken Fingers & Buffalo Wings,824 S Church St,5.0,1.0,(864) 232-2929,206.689262,34.83928178,-82.400611,Yelp,"Greenville, SC",NEXT Innovation Center,34.84043,-82.39883,5


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

'''
So, I tried to work with basic information that search result returns, but 
Address, Rating, Price and Phone are information comes from the basic API search info from Yelp API.
Rating, Price and Phone don't come from the basic Foursquare API search, and then you have to make one more request to retrieve this information. Consequently, it takes more time to have the complete information from the place.

API Authentication
Foursquare is an API key.
Yelp is a Bearer Token.

API Documentation
Both use the same framework to document APIs, but in my opinion, Foursquare was easier to find what I want in comparison with Yelp.

API Results
The results were different. Depends on the city, Foursquare has more places or Yelp has. 

Address information
I got "formatted_address" attribute from Foursquare, and "display_address" from Yelp.
Foursquare has the complete address with City name and zip code. Yelp has street and number only.

Rating information 
Both are numerical.
Foursquare has a rating between 0.0 to 10.0.
Yelp has a rating between 1 to 5.

Price information 
Foursquare is a numerical value (from 1 to 4)
Values include:
- 1 = Cheap
- 2 = Moderate
- 3 = Expensive
- 4 = Very Expensive.

Yelp is a string (Value is one of $, $$, $$$ and $$$$.)

Phone information 
Foursquare is just the numbers, but it is not formatted.
Yelp has the attribute "display_phone", and phone number of the business formatted nicely to be displayed to users. The format is the standard phone number format for the business's country.

'''

Get the top 10 restaurants according to their rating

In [9]:
df_concat = pd.concat([df_foursquare_cleaned, df_yelp_cleaned]).reset_index(drop=True)
df_concat.sort_values(by=['rating'], ascending=False).head(10)

Unnamed: 0,id,name,address,rating,price,phone,distance,latitude,longitude,source,city,station,station_latitude,station_longitude,station_number_of_bikes
58,2kMJ1YV1LJtKKo-lxNrb9w,HenDough,126 Augusta St,10.0,2.0,(864) 373-9108,255.01226,34.8428506,-82.4061763,Yelp,"Greenville, SC",Fluor Field,34.8429,-82.40897,4
102,GaogkqqI0sovKboINcrhXQ,The Jones Oyster,22 E Court St,10.0,2.0,(864) 549-0301,87.189448,34.848133,-82.399347,Yelp,"Greenville, SC",City Hall,34.84841,-82.40027,3
100,F36L0ZHNP9VD44z2DdlERA,Taqueria El Paso Victoria,2500 Old Buncombe Rd,10.0,1.0,(864) 232-7210,1179.793026,34.87815,-82.41461,Yelp,"Greenville, SC",Swamp Rabbit Cafe,34.86926,-82.42157,5
94,PzDFOjXZNfTRPaO6kNSHbw,Sun Belly Cafe,1409 W Blue Ridge Dr,10.0,2.0,(404) 309-7791,853.617459,34.8741,-82.42892,Yelp,"Greenville, SC",Swamp Rabbit Cafe,34.86926,-82.42157,5
21,4e653467d164ddd5e6f277b6,Swamp Rabbit Cafe and Grocery,"205 Cedar Lane Rd, Greenville, SC 29611",9.4,2.0,(864) 255-3385,77.0,34.869832,-82.421934,Foursquare,"Greenville, SC",Swamp Rabbit Cafe,34.86926,-82.42157,5
117,XqUsstI6XMr_JZgOIu54iw,Zorba Lounge,1414 E Washington St,9.0,2.0,(864) 233-3125,703.529902,34.85047,-82.37924,Yelp,"Greenville, SC",Greenville Zoo,34.84717,-82.38581,2
47,yKc6rO92zePSz6dbxqzhqA,Farm Fresh Fast,860 S Church St,9.0,2.0,(864) 518-1978,290.636157,34.83845720293176,-82.40091902883553,Yelp,"Greenville, SC",NEXT Innovation Center,34.84043,-82.39883,5
32,0NBo2hP22P80KQnkyRttnw,Bex Cafe & Juice Bar,820 S Main St,9.0,1.0,(864) 552-1509,245.81405,34.844097,-82.406646,Yelp,"Greenville, SC",Fluor Field,34.8429,-82.40897,4
33,5nDkkj6j8xRZUbhT1fEjYA,Biscuit Head,823 S Church St,9.0,2.0,(864) 248-0371,165.156263,34.8392776522715,-82.3999717691856,Yelp,"Greenville, SC",NEXT Innovation Center,34.84043,-82.39883,5
70,L4GBV2rBF-NkRRXxdOHtpQ,Menkoi Noodle House,241 N Main St,9.0,1.0,(864) 373-9233,39.069525,34.85365,-82.398078,Yelp,"Greenville, SC",College & Main,34.85399,-82.39797,6
