In [3]:
import requests
import pandas as pd

# we can copy the Yelp API key and assign it to a variable api_key

In [4]:
api_key="****"

In [5]:
# Using the yelp business search API: https://www.yelp.com/developers/documentation/v3/business_search

# headers contain the api key.
headers = {'Authorization': 'Bearer {}'.format(api_key)}
search_api_url = 'https://api.yelp.com/v3/businesses/search'
params = {'term': 'coffee', 
          'location': 'Toronto, Ontario',
          'limit': 50}
 

In [6]:
# timeout of 5 seconds: https://requests.readthedocs.io/en/master/user/quickstart/#timeouts
response = requests.get(search_api_url, headers=headers, params=params, timeout=5)

In [7]:
# To take a look at the URL and the response object’s status
print(response.url)
print(response.status_code)

https://api.yelp.com/v3/businesses/search?term=coffee&location=Toronto%2C+Ontario&limit=50
200


In [8]:
# to check the response
print(response.headers)

{'Connection': 'keep-alive', 'content-type': 'application/json', 'ratelimit-dailylimit': '5000', 'server': 'envoy', 'ratelimit-remaining': '5000', 'x-b3-sampled': '0', 'x-routing-service': 'routing-main--useast1-5c95747677-h9kw4; site=public_api_v3', 'ratelimit-resettime': '2023-01-20T00:00:00+00:00', 'x-zipkin-id': '038dfc6ddd9a713a', 'x-cloudmap': 'routing_useast1', 'x-mode': 'ro', 'x-proxied': '10-65-75-6-useast1aprod', 'content-encoding': 'gzip', 'x-extlb': '10-65-75-6-useast1aprod', 'cache-control': 'max-age=0, no-store, private, no-transform', 'Accept-Ranges': 'bytes', 'Date': 'Thu, 19 Jan 2023 19:15:36 GMT', 'Via': '1.1 varnish', 'X-Served-By': 'cache-ewr18174-EWR', 'X-Cache': 'MISS', 'X-Cache-Hits': '0', 'Vary': 'Accept-Encoding', 'transfer-encoding': 'chunked'}


In [9]:
#Converting response in dictionary
data_dict=response.json()

In [10]:
#Checking type of the return data
type(data_dict)

dict

In [11]:
#What are main keys?
data_dict.keys()

dict_keys(['businesses', 'total', 'region'])

In [12]:
#Businesses is a list of dictionaries and each individual dictionary represent a business
print (type(data_dict['businesses']) )
print (type(data_dict['businesses'][0]) )

<class 'list'>
<class 'dict'>


In [13]:
#what is first business? 
data_dict['businesses'][0]

{'id': 'b9Ctxco-1HyjE55W_28MYA',
 'alias': 'himalayan-coffee-house-toronto-2',
 'name': 'Himalayan Coffee House',
 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/pdZXiBAwPgXOgkzMXO9kgQ/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/himalayan-coffee-house-toronto-2?adjust_creative=V0o2j3ytyMMUE-fB7S4BTQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=V0o2j3ytyMMUE-fB7S4BTQ',
 'review_count': 155,
 'categories': [{'alias': 'coffee', 'title': 'Coffee & Tea'}],
 'rating': 4.5,
 'coordinates': {'latitude': 43.71336, 'longitude': -79.4},
 'transactions': [],
 'price': '$',
 'location': {'address1': '2552 Yonge Street',
  'address2': '',
  'address3': '',
  'city': 'Toronto',
  'zip_code': 'M4P 2J2',
  'country': 'CA',
  'state': 'ON',
  'display_address': ['2552 Yonge Street', 'Toronto, ON M4P 2J2', 'Canada']},
 'phone': '+14164856464',
 'display_phone': '+1 416-485-6464',
 'distance': 1954.9169469231715}

In [14]:
df=pd.Series((data_dict['businesses'][0]))

In [15]:
print(df)


id                                          b9Ctxco-1HyjE55W_28MYA
alias                             himalayan-coffee-house-toronto-2
name                                        Himalayan Coffee House
image_url        https://s3-media4.fl.yelpcdn.com/bphoto/pdZXiB...
is_closed                                                    False
url              https://www.yelp.com/biz/himalayan-coffee-hous...
review_count                                                   155
categories          [{'alias': 'coffee', 'title': 'Coffee & Tea'}]
rating                                                         4.5
coordinates             {'latitude': 43.71336, 'longitude': -79.4}
transactions                                                    []
price                                                            $
location         {'address1': '2552 Yonge Street', 'address2': ...
phone                                                 +14164856464
display_phone                                      +1 416-485-

# Doing another API request to find Cafes in NYC

In [42]:
# Create dictionary to query API for cafes in NYC
parameters = {'term':'cafe',
          	  'location':'NYC',
               'sort_by': 'rating'}


In [43]:
response_nyc = requests.get(search_api_url, headers=headers, params=parameters, timeout=5)

In [44]:
# Extract JSON data from the response
data_nyc = response_nyc.json()


In [45]:
# Load data to a dataframe
cafes = pd.DataFrame(data_nyc['businesses'])

In [47]:
# To flatten nested data
# Load json_normalize()
from pandas.io.json import json_normalize

# Isolate the JSON data from the API response
data_nyc = response_nyc.json()

# Flatten business data into a dataframe, replace separator
cafes = json_normalize(data_nyc["businesses"],
             sep='_')

# View data
print(cafes.head())


                       id                              alias  \
0  USWkf_B93v_8e2K4mh0yiw  ariston-flowers-and-cafe-new-york   
1  bJDU8KNLQMrZG0Ngs4AY0w                 le-phin-new-york-2   
2  S4mRsE_WwHfZj1cfZWUg6g                   paquita-new-york   
3  C9mE4FfzF56wttmakkwcNQ                     talea-brooklyn   
4  VLC2DROxvGX-ka_aBmJN9w         social-house-cafe-brooklyn   

                     name                                          image_url  \
0  Ariston Flowers & Cafe  https://s3-media1.fl.yelpcdn.com/bphoto/PkwVw0...   
1                 Le Phin  https://s3-media3.fl.yelpcdn.com/bphoto/9ASzwJ...   
2                 Paquita  https://s3-media1.fl.yelpcdn.com/bphoto/-zENBT...   
3                   TALEA  https://s3-media3.fl.yelpcdn.com/bphoto/XOs2v9...   
4       Social House Cafe  https://s3-media2.fl.yelpcdn.com/bphoto/yRO3_u...   

   is_closed                                                url  review_count  \
0      False  https://www.yelp.com/biz/ariston-flower

  cafes = json_normalize(data_nyc["businesses"],


In [48]:
# we see categories is further nested, we will use record_path settings
# Load other business attributes and set meta prefix
top_50_cafes = pd.json_normalize(data_nyc["businesses"],
                            sep="_",
                    		record_path="categories",
                    		meta=["name", 
                                  "alias",  
                                  "rating",
                          		  ["coordinates","latitude" ], 
                          		  ["coordinates","longitude"]],
                    		meta_prefix="biz_")



In [50]:
# View the data
print(top_50_cafes.head(10))


       alias         title                biz_name  \
0     coffee  Coffee & Tea  Ariston Flowers & Cafe   
1   florists      Florists  Ariston Flowers & Cafe   
2     coffee  Coffee & Tea                 Le Phin   
3        tea     Tea Rooms                 Paquita   
4   antiques      Antiques                 Paquita   
5     coffee  Coffee & Tea                 Paquita   
6       bars          Bars                   TALEA   
7     coffee  Coffee & Tea                   TALEA   
8  breweries     Breweries                   TALEA   
9     coffee  Coffee & Tea       Social House Cafe   

                           biz_alias biz_rating biz_coordinates_latitude  \
0  ariston-flowers-and-cafe-new-york        5.0                40.735796   
1  ariston-flowers-and-cafe-new-york        5.0                40.735796   
2                 le-phin-new-york-2        5.0                40.728603   
3                   paquita-new-york        5.0                40.733677   
4                   paqui

In [39]:
# Add an offset parameter to get cafes 51-100
params = {"term": "cafe", 
          "location": "NYC",
          "sort_by": "rating", 
          "limit": 50,
          "offset": 50}

In [54]:
result = requests.get(search_api_url, headers=headers, params=params)
next_50_cafes = pd.json_normalize(result.json()["businesses"])

In [63]:
# Append the results, setting ignore_index to renumber rows
cafes = pd.concat([top_50_cafes,next_50_cafes], ignore_index=True)

#cafes = top_50_cafes.append(next_50_cafes, ignore_index=True)
# Print shape of cafes
print(cafes.dtypes)


alias                         object
title                         object
biz_name                      object
biz_alias                     object
biz_rating                    object
biz_coordinates_latitude      object
biz_coordinates_longitude     object
id                            object
name                          object
image_url                     object
is_closed                     object
url                           object
review_count                 float64
categories                    object
rating                       float64
transactions                  object
phone                         object
display_phone                 object
distance                     float64
coordinates.latitude         float64
coordinates.longitude        float64
location.address1             object
location.address2             object
location.address3             object
location.city                 object
location.zip_code             object
location.country              object
l