In [2]:
import pandas as pd
import requests
import json
import os 

foursquare_key = os.environ["FOURSQUARE_API_KEY"]
yelp_key = os.environ["YELP_API_KEY"]
yelp_key2 = os.environ["YELP_API_KEY2"]

In [3]:
# Read the CSV files
df = pd.read_csv('df_city_bike.csv')

In [4]:
df

Unnamed: 0,name,empty_slots,free_bikes,latitude,longitude
0,56 - CLN 110 W1 Norte,9,2,-15.758862,-47.888820
1,54 - CLN 107,6,5,-15.766000,-47.886460
2,42 - CLN 204 L1 Norte,11,0,-15.776620,-47.877490
3,48 - CLN 209 L1 Norte,1,14,-15.757370,-47.882230
4,43 - SQN 405 L2 Norte,8,7,-15.771630,-47.875050
...,...,...,...,...,...
65,70 - UNB Centro Olímpico,1,10,-15.765020,-47.858400
66,17 - Funarte,12,2,-15.791163,-47.897600
67,61 - CLN 113 W1 Norte,10,1,-15.750380,-47.891660
68,28 - Banco Central,6,5,-15.799740,-47.884110


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [5]:
def send_foursquare_request(df, foursquare_key, parse_callback):
    parsed_results = []

    # Define the headers
    headers = {
        "accept": "application/json",
        "Authorization": foursquare_key
    }

    # Send requests and process responses
    for _, row in df.iterrows():
        latitude = row['latitude']
        longitude = row['longitude']
        url = f"https://api.foursquare.com/v3/places/search?ll={latitude}%2C{longitude}&radius=1000&fields=categories%2Crating%2Cgeocodes%2Ccategories%2Clocation%2Cname%2Cfsq_id%2Cstats%2Cprice&limit=50"
        
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        pois = parse_callback(response.json())

        # Extend the parsed results list with the new POIs
        for poi in pois:
            poi['latitude'] = latitude
            poi['longitude'] = longitude

        parsed_results.extend(pois)

    return parsed_results


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [6]:
def parse_response(response):
    if 'results' in response:
        pois = []

        for result in response['results']:
            fsq_id = result['fsq_id']
            name = result['name']
            rating = result.get('rating', None)
            categories = [category['name'] for category in result.get('categories', [])]
            location = result.get('location', {})
            address = location.get('formatted_address', '')
            latitude = location.get('latitude', '')
            longitude = location.get('longitude', '')

            poi = {
                'fsq_id': fsq_id,
                'name': name,
                'rating': rating,
                'categories': categories,
                'address': address,
                'latitude': latitude,
                'longitude': longitude
            }

            # Filter by restaurant category
            if 'Restaurant' in categories:
                pois.append(poi)

        return pois

    return []

# Chamando a função send_foursquare_request com parse_response como callback
results = send_foursquare_request(df, foursquare_key, parse_response)


Put your parsed results into a DataFrame

In [7]:
def create_dataframe(parsed_results):
    # Create a DataFrame from the parsed results
    df_foursquare = pd.DataFrame(parsed_results)
    return df_foursquare

# Call the function with the DataFrame, the foursquare_key, and parse_response as the callback
parsed_results = send_foursquare_request(df, foursquare_key, parse_response)

# Create a DataFrame from the parsed results for Parte 3
df_foursquare = create_dataframe(parsed_results)

# Display the parsed DataFrame
print(df_foursquare)


                       fsq_id                                          name  \
0    4cfe88e1084f548186099109                           Torteria Di Lorenza   
1    548b5911498ecccac953fc61                                      Café Oyá   
2    537f6252498e54e075c1b714                               Dulce Patagonia   
3    4e97331df5b91722c72e3cf2                               Delícia de Bolo   
4    5682ee79498e148ae235ced0  Philippe Verstraete Pâtisserie e Boulangerie   
..                        ...                                           ...   
385  5706d023498eb7eb702acffe                                    Gádio Café   
386  4bb3624a2397b713070338b3                                    Kopenhagen   
387  53d6cffd498ee7e6da02f4df                            Chiquinho Sorvetes   
388  4ca75e32a6e08cfa4dd08394                                     Yogoberry   
389  515cceace4b0c49b3f20da69                                   Malbec Beer   

     rating                                        

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [8]:
# Parte 1: Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice.
def get_yelp(latitude, longitude, yelp_key):
    url = "https://api.yelp.com/v3/businesses/search"

    headers = {
        "Authorization": f"Bearer {yelp_key2}"
    }

    params = {
        "latitude": latitude,
        "longitude": longitude,
        "radius": 1000
    }

    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()

    data_yelp = response.json()

    if 'businesses' in data_yelp:
        businesses = data_yelp['businesses']
        
        # Add latitude and longitude to each business
        for business in businesses:
            business['latitude'] = latitude
            business['longitude'] = longitude

        return businesses
    else:
        return []

# Call the function for each bike station
yelp_results = []

for _, row in df.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']

    businesses = get_yelp(latitude, longitude, yelp_key)
    yelp_results.extend(businesses)

# Process the Yelp results as needed
for business in yelp_results:
    # Extract the details you want (name, rating, location, etc.)
    name = business['name']
    rating = business.get('rating', None)
    location = business.get('location', {})
    address = location.get('address1', '')
    city = location.get('city', '')
    state = location.get('state', '')
    zip_code = location.get('zip_code', '')
    latitude = business.get('latitude', None)
    longitude = business.get('longitude', None)



Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [9]:
# Parte 2: Process the Yelp results and store them in a list
parsed_results = []

for business in yelp_results:
    # Extract the details you want (name, rating, location, etc.)
    name = business['name']
    rating = business.get('rating', None)
    location = business.get('location', {})
    address = location.get('address1', '')
    city = location.get('city', '')
    state = location.get('state', '')
    zip_code = location.get('zip_code', '')
    latitude = business.get('latitude', None)
    longitude = business.get('longitude', None)

    # Create a dictionary for the parsed result
    parsed_result = {
        'Name': name,
        'Rating': rating,
        'Address': address,
        'City': city,
        'State': state,
        'ZIP Code': zip_code,
        'Latitude': latitude,
        'Longitude': longitude
    }

    # Append the parsed result to the list
    parsed_results.append(parsed_result)


Put your parsed results into a DataFrame

In [10]:
# Create a DataFrame from the parsed results
df_yelp = pd.DataFrame(parsed_results)

# Convert Latitude and Longitude columns to float
df_yelp['Latitude'] = df_yelp['Latitude'].astype(float)
df_yelp['Longitude'] = df_yelp['Longitude'].astype(float)

# Print the DataFrame
print(df_yelp)


                                          Name  Rating  \
0                                          BFC     4.0   
1                            Maori Gastronomia     4.5   
2                  Feitiço Mineiro Restaurante     4.5   
3                      Mucho Gusto Gastronomia     4.5   
4                                  Sushi Woman     4.0   
...                                        ...     ...   
1351  Cristina Colina da Pedra Cafés Especiais     5.0   
1352                             Los Paleteros     4.5   
1353                Museu Nacional de Brasília     4.0   
1354                       Armazém do Ferreira     4.0   
1355                              Espaço Grill     5.0   

                              Address      City State   ZIP Code   Latitude  \
0             Cln 110 BLOCO B LOJA 62      Shcn    DF            -15.758862   
1                            CLN, 110  Brasília    DF  70753-000 -15.758862   
2              CLN 306 BL D  lj 45/51  Brasília    DF  70745-540 -

In [14]:
# Export the DataFrame to a CSV file
df_foursquare.to_csv('df_foursquare.csv', index=False)
df_yelp.to_csv('df_yelp.csv', index=False)


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

In [19]:
# Calculate the number of POIs from each API
yelp_poi_count = len(df_yelp)
foursquare_poi_count = len(df_foursquare)

print(f"Yelp POI Count: {yelp_poi_count}")
print(f"Foursquare POI Count: {foursquare_poi_count}")
print("------------------------")

# Compare the completeness of data for each POI
yelp_attributes_count = len(df_yelp.columns)
foursquare_attributes_count = len(df_foursquare.columns)

print(f"Yelp Attributes Count: {yelp_attributes_count}")
print(f"Foursquare Attributes Count: {foursquare_attributes_count}")
print("------------------------")

# Calculate the average number of reviews per POI
yelp_avg_reviews = df_yelp['Rating'].mean()
foursquare_avg_reviews = df_foursquare['rating'].mean()

print(f"Yelp Average Reviews: {yelp_avg_reviews}")
print(f"Foursquare Average Reviews: {foursquare_avg_reviews}")
print("------------------------")

# Compare additional attributes
yelp_additional_attributes = df_yelp.drop(['Name', 'Rating', 'Address', 'City', 'State', 'ZIP Code', 'Latitude', 'Longitude'], axis=1).columns
foursquare_additional_attributes = df_foursquare.drop(['fsq_id', 'name', 'rating', 'categories', 'address', 'latitude', 'longitude'], axis=1).columns

print(f"Yelp Additional Attributes: {yelp_additional_attributes}")


Yelp POI Count: 1356
Foursquare POI Count: 390
------------------------
Yelp Attributes Count: 8
Foursquare Attributes Count: 7
------------------------
Yelp Average Reviews: 4.142330383480826
Foursquare Average Reviews: 7.598102981029804
------------------------
Yelp Additional Attributes: Index([], dtype='object')


In [None]:
# Based on the comparison of the Yelp and Foursquare APIs for our location, we can make the following observations:

# POI Count: Yelp provides a higher number of Points of Interest (POIs) with 1356, whereas Foursquare has 390 POIs. This indicates that Yelp has a larger database of businesses or locations in our area.

# Attributes Count: Yelp has 8 attributes for each POI, while Foursquare has 7 attributes. This suggests that Yelp provides more detailed information about each POI.

# Average Reviews: The average rating for Yelp POIs is 4.14, whereas for Foursquare POIs, it is 7.60. This indicates that Foursquare might have more reviews per POI, suggesting a higher level of engagement and user-generated content.

# Additional Attributes: The Yelp dataset doesn't have any additional attributes beyond the standard ones like Name, Rating, Address, etc. Foursquare, on the other hand, might have additional attributes that are not present in the Yelp dataset.

# Based on these comparisons, it appears that Yelp provides a larger number of POIs and more detailed attributes for each POI. However, Foursquare seems to have a higher average rating and potentially more user-generated content in the form of reviews.

Get the top 10 restaurants according to their rating

In [22]:
df_foursquare_unique = df_foursquare.drop_duplicates(subset='name')
top_10_foursquare = df_foursquare_unique.nlargest(10, 'rating')
print(top_10_foursquare[['name', 'rating']])


                                            name  rating
50                                       Oni-Uno     9.1
316  Bacio di Latte - Shopping Conjunto Nacional     9.1
71                          La Petit Boulangerie     9.0
2                                Dulce Patagonia     8.9
87                   Cachorro Quente do Edivaldo     8.8
185                          Confeitaria Mineira     8.8
290                      Castália Padaria e Café     8.8
123                             Bischoff Gourmet     8.7
20                                   Quanto Cafe     8.6
45                                      Santé 13     8.6


In [23]:
df_yelp_unique = df_yelp.drop_duplicates(subset='Name')
top_10_yelp = df_yelp_unique.nlargest(10, 'Rating')
print(top_10_yelp[['Name', 'Rating']])


                                         Name  Rating
9                          Oyá Cozinha Vegana     5.0
27                   Maestra Vida Restaurante     5.0
43                               Supren Verda     5.0
52                             Páprica Burger     5.0
58   Cristina Colina da Pedra Cafés Especiais     5.0
91                  Quiosque em frente ao HUB     5.0
101                               GenghisKhan     5.0
105                                  Cannelle     5.0
106                              C'est si Bon     5.0
108                             La Fornacella     5.0
