In [1]:
## imports
import pandas as pd
import numpy as np
import re
import requests
import yaml


## repeated printouts
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# 1. Example 1: no credentials; no wrapper

Site: National Assessment of Education Progress (NAEP)

Documentation: https://www.nationsreportcard.gov/api_documentation.aspx

Base link: https://www.nationsreportcard.gov/DataService/GetAdhocData.aspx 

## 1.1 Query to pull some data

In [10]:
## using their example query of 2011 writing scores separated by gender
## based on here - https://stackoverflow.com/questions/40836749/pythonic-way-of-writing-a-single-line-long-string
## using the ( ) syntax to formulate a long
## string without linebreaks added
example_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011')


example_naep_query


'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011'

In [3]:
## use requests to call the api
naep_resp = requests.get(example_naep_query)
naep_resp
print(type(naep_resp))

## get the json contents of the response 
## here, we're assuming valid response
naep_resp_j = naep_resp.json()
naep_resp_j

## with result, turn it into a dataframe
naep_resp_d = pd.DataFrame(naep_resp_j['result'])
naep_resp_d

<Response [200]>

<class 'requests.models.Response'>


{'status': 200,
 'result': [{'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '1',
   'varValueLabel': 'Male',
   'value': 139.099504632971,
   'isStatDisplayable': 1,
   'errorFlag': 0},
  {'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '2',
   'varValueLabel': 'Female',
   'value': 158.567104984955,
   'isStatDisplayable': 1,
   'errorFlag': 0}]}

Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,1,Male,139.099505,1,0
1,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,2,Female,158.567105,1,0


## 1.2 What happens if there's an error in our query?

In [4]:
## here's a query that from the documentation we know
## won't work since i modified year to 2025 which doesnt
## exist in the data
wrong_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2025')

wrong_naep_query

'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2025'

In [5]:
## use requests to call the api
naep_wrong_resp = requests.get(wrong_naep_query)
naep_wrong_resp

<Response [200]>

In [9]:
## in the case of this particular api,
## the call returns some response but
## when we try to extract the json containing
## status or results, we get in an error
#naep_wrong_resp.json() # uncomment to see error

### 1.2.2 More all-purpose way of allowing remainder of calls to run: try, except

In [6]:
## putting it in a try; except as general error catching
try:
    results = naep_wrong_resp.json()['result']
except Exception as e:
    print('Failed to get result from API due to error:')
    print(e) # or just: pass

Failed to get result from API due to error:
Invalid control character at: line 1 column 289 (char 288)


### 1.2.3 Can usually also find more targeted way but that varies more across APIs

In [7]:
## if we wanted do more specific error catching,
## see that the status == 400 actually appears here
## so could write if else along those lines
naep_wrong_resp.text
naep_resp.text

if "System.Exception" in naep_wrong_resp.text:
    print("NAEP results not found")

'{"status":400,"result": "System.Exception: The query \'SELECT DISTINCT Framework FROM Cycles WHERE Subject=\'WRI\' AND Cohort=2 AND CONVERT(VARCHAR(10),Year)+Sample IN (\'2025R3\')\' did not return exactly 1 framework. Make sure you can trend the years defined for the given subject and cohort.\r\n   at NRCDataService3.GetAdhocData.GetFramework(NDEContext& ndeContext, String subjectCode, List`1 yearSamples, String cohort) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 2091\r\n   at NRCDataService3.GetAdhocData.PopulateBaseOrchestratorRequest() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 1781\r\n   at NRCDataService3.GetAdhocData.ConstructRequest_Datapoint() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 628\r\n   at NRCDataService3.GetAdhocData.Page_Load(Object sender, EventArgs e) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 179"}'

'{"status":200,"result": [{"year":2011,"sample":"R3","yearSampleLabel":"2011","Cohort":2,"CohortLabel":"Grade 8","stattype":"MN:MN","subject":"WRI","grade":8,"scale":"WRIRP","jurisdiction":"NP","jurisLabel":"National public","variable":"GENDER","variableLabel":"Gender","varValue":"1","varValueLabel":"Male","value":139.099504632971,"isStatDisplayable":1,"errorFlag":0},{"year":2011,"sample":"R3","yearSampleLabel":"2011","Cohort":2,"CohortLabel":"Grade 8","stattype":"MN:MN","subject":"WRI","grade":8,"scale":"WRIRP","jurisdiction":"NP","jurisLabel":"National public","variable":"GENDER","variableLabel":"Gender","varValue":"2","varValueLabel":"Female","value":158.567104984955,"isStatDisplayable":1,"errorFlag":0}]}'

NAEP results not found


## Activity 1: writing a function to make multiple, sequential calls

- Say we want to pull the data for grades 4, 8, and 12
- How can we write a function that iterates over a list of those grades and pulls the data for each grade?

**Note**: an ideal function would have arguments for each parameter in the API like subject, subscale, etc. Here we can leave those other parts constant

In [23]:
"My name is {}".format("Marina")

'My name is Marina'

In [2]:
activity_query = ('https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade={}&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011')

In [8]:
def pull_grades(query, grade):
    
    new_query = query.format(grade)

    try:
        # use requests to call the api
        naep_resp = requests.get(new_query)

        # get the json contents of the response
        naep_resp_j = naep_resp.json()

        # with result, turn it into a dataframe
        naep_resp_d = pd.DataFrame(naep_resp_j['result'])
        
        return naep_resp_d

    except Exception as e:
        return None

pull_grades(activity_query, 4)
pull_grades(activity_query, 8)
pull_grades(activity_query, 12)

Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,1,Male,139.099505,1,0
1,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,2,Female,158.567105,1,0


Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,3,Grade 12,MN:MN,WRI,12,WRIRP,NP,National public,GENDER,Gender,1,Male,141.256978,1,0
1,2011,R3,2011,3,Grade 12,MN:MN,WRI,12,WRIRP,NP,National public,GENDER,Gender,2,Female,155.385917,1,0


# 2. Example 2: needs credentials; no wrapper

Create an account here: https://www.yelp.com/developers/v3/manage_app

In [9]:
## get the key
API_KEY = "MNr2h01IMLMy24iuY80IC3eOxmVGZv-pt_L3FeHi7EUQOxQZZ41OXhKS2bbbCIwKs1IURYbKTO38XLvV3FyjN_lFV-k_er-UVyXiXX5ogQq2Q05ZcTjFk40gMawzZnYx"

In [20]:
## use documentation to define what to search
## doc: https://www.yelp.com/developers/documentation/v3/business_search
## write the query 
base_url = "https://api.yelp.com/v3/businesses/search?"
my_name = "restaurants"
my_location = "Hanover, NH, 03755"
yelp_genquery = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = my_name,
                loc = my_location)

## use requests to call the API; here, we're
## passing it our credentials (structure varies
## by API and telling it to only return 10 results
## (max is 50 at once)
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp = requests.get(yelp_genquery, headers = header)
yelp_genresp

## then, look at structure of response
yelp_genjson = yelp_genresp.json()

<Response [200]>

In [18]:
## example business
yelp_genjson['businesses'][0]

## more automatic way of summarizing but things end up in lists
## within columns for things like categories
yelp_gendf = pd.DataFrame(yelp_genjson['businesses'])
yelp_gendf.head()

{'id': '1i0gf9awbeXMkrkpjWOaJg',
 'alias': 'aracosia-mclean-mclean',
 'name': 'Aracosia McLean',
 'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/LT0pmvgsMmJRworhKbTDMw/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/aracosia-mclean-mclean?adjust_creative=EoYABbEHu8VML1LgkAgLQQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=EoYABbEHu8VML1LgkAgLQQ',
 'review_count': 395,
 'categories': [{'alias': 'afghani', 'title': 'Afghan'},
  {'alias': 'steak', 'title': 'Steakhouses'},
  {'alias': 'whiskeybars', 'title': 'Whiskey Bars'}],
 'rating': 4.8,
 'coordinates': {'latitude': 38.93501, 'longitude': -77.1794281},
 'transactions': ['pickup', 'delivery'],
 'price': '$$$',
 'location': {'address1': '1381 Beverly Rd',
  'address2': '',
  'address3': None,
  'city': 'McLean',
  'zip_code': '22101',
  'country': 'US',
  'state': 'VA',
  'display_address': ['1381 Beverly Rd', 'McLean, VA 22101']},
 'phone': '+17032693820',
 'display_phone': '(703) 269-3820',

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance,attributes
0,1i0gf9awbeXMkrkpjWOaJg,aracosia-mclean-mclean,Aracosia McLean,https://s3-media1.fl.yelpcdn.com/bphoto/LT0pmv...,False,https://www.yelp.com/biz/aracosia-mclean-mclea...,395,"[{'alias': 'afghani', 'title': 'Afghan'}, {'al...",4.8,"{'latitude': 38.93501, 'longitude': -77.1794281}","[pickup, delivery]",$$$,"{'address1': '1381 Beverly Rd', 'address2': ''...",17032693820.0,(703) 269-3820,4442.597709,"{'business_temp_closed': None, 'menu_url': 'ht..."
1,Ov8YvTNxYPYZu-XWh10WAA,the-union-mclean-4,The Union,https://s3-media2.fl.yelpcdn.com/bphoto/bQq1WU...,False,https://www.yelp.com/biz/the-union-mclean-4?ad...,267,"[{'alias': 'newamerican', 'title': 'New Americ...",4.7,"{'latitude': 38.934998, 'longitude': -77.1794437}",[delivery],$$,"{'address1': '1379 Beverly Rd', 'address2': No...",17033560129.0,(703) 356-0129,4441.636081,"{'business_temp_closed': None, 'menu_url': 'ht..."
2,0mNdAXmNy9ZqxZPVc2I1Ow,yu-noodles-mclean,Yu Noodles,https://s3-media2.fl.yelpcdn.com/bphoto/ZanJHZ...,False,https://www.yelp.com/biz/yu-noodles-mclean?adj...,94,"[{'alias': 'chinese', 'title': 'Chinese'}, {'a...",4.4,"{'latitude': 38.92235817840213, 'longitude': -...",[],$$,"{'address1': '1690 Anderson Rd', 'address2': '...",,,3481.162313,"{'business_temp_closed': None, 'menu_url': Non..."
3,iAwv3mCOXIeqf2Z389RZWA,circa-at-the-boro-tysons,Circa at The Boro,https://s3-media2.fl.yelpcdn.com/bphoto/IzV2e2...,False,https://www.yelp.com/biz/circa-at-the-boro-tys...,100,"[{'alias': 'newamerican', 'title': 'New Americ...",4.5,"{'latitude': 38.92378, 'longitude': -77.233}",[],,"{'address1': '1675 Silver Hill Dr', 'address2'...",15714196272.0,(571) 419-6272,2442.773938,"{'business_temp_closed': None, 'menu_url': 'ht..."
4,OP5VkWXLTp4nxyvCJYutMA,roots-kitchen-and-bar-west-mclean,Roots Kitchen & Bar,https://s3-media2.fl.yelpcdn.com/bphoto/9JiPYC...,False,https://www.yelp.com/biz/roots-kitchen-and-bar...,40,"[{'alias': 'breakfast_brunch', 'title': 'Break...",3.7,"{'latitude': 38.952337, 'longitude': -77.224716}",[],$$,"{'address1': '8100 Old Dominion Dr', 'address2...",17037127850.0,(703) 712-7850,876.084124,"{'business_temp_closed': None, 'menu_url': 'ht..."


In [19]:
## more data-specific way of summarizing
## we're doing a simple approach and just retaining
## cols that have a simple str structure
## if doing for real, would want to extract things
def clean_yelp_json(one_biz):

    ## restrict to str cols
    d_str = {key:value for key, value in one_biz.items()
             if type(value) == str}
    
    df_str = pd.DataFrame(d_str, index = [d_str['id']])
    return(df_str)

yelp_stronly = [clean_yelp_json(one_b) for one_b in yelp_genjson['businesses']]
yelp_stronly_df = pd.concat(yelp_stronly)

yelp_stronly_df.head(7)


Unnamed: 0,id,alias,name,image_url,url,price,phone,display_phone
1i0gf9awbeXMkrkpjWOaJg,1i0gf9awbeXMkrkpjWOaJg,aracosia-mclean-mclean,Aracosia McLean,https://s3-media1.fl.yelpcdn.com/bphoto/LT0pmv...,https://www.yelp.com/biz/aracosia-mclean-mclea...,$$$,17032693820.0,(703) 269-3820
Ov8YvTNxYPYZu-XWh10WAA,Ov8YvTNxYPYZu-XWh10WAA,the-union-mclean-4,The Union,https://s3-media2.fl.yelpcdn.com/bphoto/bQq1WU...,https://www.yelp.com/biz/the-union-mclean-4?ad...,$$,17033560129.0,(703) 356-0129
0mNdAXmNy9ZqxZPVc2I1Ow,0mNdAXmNy9ZqxZPVc2I1Ow,yu-noodles-mclean,Yu Noodles,https://s3-media2.fl.yelpcdn.com/bphoto/ZanJHZ...,https://www.yelp.com/biz/yu-noodles-mclean?adj...,$$,,
iAwv3mCOXIeqf2Z389RZWA,iAwv3mCOXIeqf2Z389RZWA,circa-at-the-boro-tysons,Circa at The Boro,https://s3-media2.fl.yelpcdn.com/bphoto/IzV2e2...,https://www.yelp.com/biz/circa-at-the-boro-tys...,,15714196272.0,(571) 419-6272
OP5VkWXLTp4nxyvCJYutMA,OP5VkWXLTp4nxyvCJYutMA,roots-kitchen-and-bar-west-mclean,Roots Kitchen & Bar,https://s3-media2.fl.yelpcdn.com/bphoto/9JiPYC...,https://www.yelp.com/biz/roots-kitchen-and-bar...,$$,17037127850.0,(703) 712-7850
b8dRf3aexGfBqGeaXlQByA,b8dRf3aexGfBqGeaXlQByA,pikoteo-mclean,Pikoteo,https://s3-media2.fl.yelpcdn.com/bphoto/Ew0bFK...,https://www.yelp.com/biz/pikoteo-mclean?adjust...,,17038910123.0,(703) 891-0123
wqItanynGIz5HRUu0pGbXQ,wqItanynGIz5HRUu0pGbXQ,chans-kitchen-vienna,Chans Kitchen,https://s3-media1.fl.yelpcdn.com/bphoto/Y9yS8q...,https://www.yelp.com/biz/chans-kitchen-vienna?...,,17032082280.0,(703) 208-2280


# Activity 2: pull restaurants in a different location

- Try running a business search query for your hometown or another place by constructing a query similar to `yelp_genquery` but changing the location parameter
- Other endpoints require feeding what's called the business' fusion id into the API. Take an id from `yelp_stronly.id` and use the documentation here to pull the reviews for that business: https://docs.developer.yelp.com/reference/v3_business_reviews
- **Challenge**: generalize the previous step by writing a function that (1) takes a list of business ids as an input, (2) calls the reviews API for each id, (3) returns the results, and (4) rowbinds all results, i.e. turns them into a single, usable DataFrame

In [26]:
## use documentation to define what to search
## doc: https://www.yelp.com/developers/documentation/v3/business_search
## write the query 
base_url = "https://api.yelp.com/v3/businesses/search?"
my_name = "restaurants"
my_location = "Tysons Corner, 22182"
yelp_genquery = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = my_name,
                loc = my_location)

## use requests to call the API; here, we're
## passing it our credentials (structure varies
## by API and telling it to only return 10 results
## (max is 50 at once)
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp = requests.get(yelp_genquery, headers = header)
yelp_genresp

## then, look at structure of response
yelp_genjson = yelp_genresp.json()

<Response [200]>

In [27]:
## example business
yelp_genjson['businesses'][0]

## more automatic way of summarizing but things end up in lists
## within columns for things like categories
yelp_gendf = pd.DataFrame(yelp_genjson['businesses'])
yelp_gendf.head()

{'id': 'iAwv3mCOXIeqf2Z389RZWA',
 'alias': 'circa-at-the-boro-tysons',
 'name': 'Circa at The Boro',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/IzV2e2C970t6sxSYCXy5Fg/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/circa-at-the-boro-tysons?adjust_creative=EoYABbEHu8VML1LgkAgLQQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=EoYABbEHu8VML1LgkAgLQQ',
 'review_count': 100,
 'categories': [{'alias': 'newamerican', 'title': 'New American'},
  {'alias': 'sandwiches', 'title': 'Sandwiches'},
  {'alias': 'cocktailbars', 'title': 'Cocktail Bars'}],
 'rating': 4.5,
 'coordinates': {'latitude': 38.92378, 'longitude': -77.233},
 'transactions': [],
 'location': {'address1': '1675 Silver Hill Dr',
  'address2': '',
  'address3': None,
  'city': 'Tysons',
  'zip_code': '22102',
  'country': 'US',
  'state': 'VA',
  'display_address': ['1675 Silver Hill Dr', 'Tysons, VA 22102']},
 'phone': '+15714196272',
 'display_phone': '(571) 419-6272',
 'distance

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,attributes,price
0,iAwv3mCOXIeqf2Z389RZWA,circa-at-the-boro-tysons,Circa at The Boro,https://s3-media2.fl.yelpcdn.com/bphoto/IzV2e2...,False,https://www.yelp.com/biz/circa-at-the-boro-tys...,100,"[{'alias': 'newamerican', 'title': 'New Americ...",4.5,"{'latitude': 38.92378, 'longitude': -77.233}",[],"{'address1': '1675 Silver Hill Dr', 'address2'...",15714196272,(571) 419-6272,804.704259,"{'business_temp_closed': None, 'menu_url': 'ht...",
1,h2k1JX0-YfKwvFJqEOYSeA,joon-vienna,Joon,https://s3-media1.fl.yelpcdn.com/bphoto/J4QtwQ...,False,https://www.yelp.com/biz/joon-vienna?adjust_cr...,287,"[{'alias': 'persian', 'title': 'Persian/Irania...",4.0,"{'latitude': 38.91296125509596, 'longitude': -...","[pickup, delivery]","{'address1': '8045 Leesburg Pike', 'address2':...",15713781390,(571) 378-1390,702.544567,"{'business_temp_closed': None, 'menu_url': 'ht...",$$$
2,qbT0H8xT4lSeFe_qhmJz-w,roll-play-vienna-4,Roll Play,https://s3-media4.fl.yelpcdn.com/bphoto/TPiAoZ...,False,https://www.yelp.com/biz/roll-play-vienna-4?ad...,1675,"[{'alias': 'vietnamese', 'title': 'Vietnamese'...",4.3,"{'latitude': 38.9164603, 'longitude': -77.2266...","[pickup, delivery]","{'address1': '8150 Leesburg Pike', 'address2':...",17038915665,(703) 891-5665,360.225827,"{'business_temp_closed': None, 'menu_url': 'ht...",$$
3,ppmmV1FCM5vtjLe8rmzr1A,clarity-vienna,Clarity,https://s3-media3.fl.yelpcdn.com/bphoto/1ZiJM4...,False,https://www.yelp.com/biz/clarity-vienna?adjust...,1253,"[{'alias': 'newamerican', 'title': 'New Americ...",4.4,"{'latitude': 38.90929, 'longitude': -77.25661}",[delivery],"{'address1': '442 Maple Ave E', 'address2': ''...",17035398400,(703) 539-8400,2368.822903,"{'business_temp_closed': None, 'menu_url': Non...",$$$
4,FKlyr_Tq3-MmaSGElHGbyQ,songbird-fairfax,Songbird,https://s3-media4.fl.yelpcdn.com/bphoto/wRX17v...,False,https://www.yelp.com/biz/songbird-fairfax?adju...,100,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...",4.5,"{'latitude': 38.85389926542205, 'longitude': -...","[pickup, delivery]","{'address1': '10940 Fairfax Blvd', 'address2':...",17032616983,(703) 261-6983,10481.868597,"{'business_temp_closed': None, 'menu_url': 'ht...",


In [28]:
## more data-specific way of summarizing
## we're doing a simple approach and just retaining
## cols that have a simple str structure
## if doing for real, would want to extract things
def clean_yelp_json(one_biz):

    ## restrict to str cols
    d_str = {key:value for key, value in one_biz.items()
             if type(value) == str}
    
    df_str = pd.DataFrame(d_str, index = [d_str['id']])
    return(df_str)

yelp_stronly = [clean_yelp_json(one_b) for one_b in yelp_genjson['businesses']]
yelp_stronly_df = pd.concat(yelp_stronly)

yelp_stronly_df.head(7)

Unnamed: 0,id,alias,name,image_url,url,phone,display_phone,price
iAwv3mCOXIeqf2Z389RZWA,iAwv3mCOXIeqf2Z389RZWA,circa-at-the-boro-tysons,Circa at The Boro,https://s3-media2.fl.yelpcdn.com/bphoto/IzV2e2...,https://www.yelp.com/biz/circa-at-the-boro-tys...,15714196272,(571) 419-6272,
h2k1JX0-YfKwvFJqEOYSeA,h2k1JX0-YfKwvFJqEOYSeA,joon-vienna,Joon,https://s3-media1.fl.yelpcdn.com/bphoto/J4QtwQ...,https://www.yelp.com/biz/joon-vienna?adjust_cr...,15713781390,(571) 378-1390,$$$
qbT0H8xT4lSeFe_qhmJz-w,qbT0H8xT4lSeFe_qhmJz-w,roll-play-vienna-4,Roll Play,https://s3-media4.fl.yelpcdn.com/bphoto/TPiAoZ...,https://www.yelp.com/biz/roll-play-vienna-4?ad...,17038915665,(703) 891-5665,$$
ppmmV1FCM5vtjLe8rmzr1A,ppmmV1FCM5vtjLe8rmzr1A,clarity-vienna,Clarity,https://s3-media3.fl.yelpcdn.com/bphoto/1ZiJM4...,https://www.yelp.com/biz/clarity-vienna?adjust...,17035398400,(703) 539-8400,$$$
FKlyr_Tq3-MmaSGElHGbyQ,FKlyr_Tq3-MmaSGElHGbyQ,songbird-fairfax,Songbird,https://s3-media4.fl.yelpcdn.com/bphoto/wRX17v...,https://www.yelp.com/biz/songbird-fairfax?adju...,17032616983,(703) 261-6983,
QgsCrTGlq_7R5IirExdnNw,QgsCrTGlq_7R5IirExdnNw,agora-tysons-tysons-2,Agora Tysons,https://s3-media2.fl.yelpcdn.com/bphoto/a8G8gr...,https://www.yelp.com/biz/agora-tysons-tysons-2...,17036638737,(703) 663-8737,$$
wqItanynGIz5HRUu0pGbXQ,wqItanynGIz5HRUu0pGbXQ,chans-kitchen-vienna,Chans Kitchen,https://s3-media1.fl.yelpcdn.com/bphoto/Y9yS8q...,https://www.yelp.com/biz/chans-kitchen-vienna?...,17032082280,(703) 208-2280,


In [31]:
base_url = "https://api.yelp.com/v3/businesses/{}/reviews"
header = {'Authorization': f"Bearer {API_KEY}"}

def get_business_reviews(base_url, header, business_ids):
    total_reviews = []
    for bus_id in business_ids:
        total_query = base_url.format(bus_id)
        yelp_genresp = requests.get(total_query, headers = header)
        yelp_genresp
        yelp_genjson = yelp_genresp.json()['reviews']
        yelp_genjson
        for rev_dict in yelp_genjson:
            rev_dict["business_id"] = bus_id
        total_reviews += yelp_genjson
    final_df = pd.DataFrame(total_reviews)
    return final_df
get_business_reviews(base_url, header, ["MRTdhLfHe-Uw-qQnQXxgVg", "PqyJx0ndtwBT9wLeFYnhoQ", "zeFASIGTjBwwPNf0ab86Jw"])

KeyError: 'reviews'

In [32]:
# Get one random business
one_biz = yelp_strongly_df.iloc[8][['name', 'id']]
one_biz

NameError: name 'yelp_strongly_df' is not defined