In [44]:
## imports
import pandas as pd
import numpy as np
import re
import requests
import yaml


## repeated printouts
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# 1. Example 1: no credentials; no wrapper

Site: National Assessment of Education Progress (NAEP)

Documentation: https://www.nationsreportcard.gov/api_documentation.aspx

Base link: https://www.nationsreportcard.gov/DataService/GetAdhocData.aspx 

## 1.1 Query to pull some data

In [45]:
## using their example query of 2011 writing scores separated by gender
## based on here - https://stackoverflow.com/questions/40836749/pythonic-way-of-writing-a-single-line-long-string
## using the ( ) syntax to formulate a long
## string without linebreaks added
example_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011')


example_naep_query


'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011'

In [46]:
## use requests to call the api
naep_resp = requests.get(example_naep_query)
naep_resp
print(type(naep_resp))

## get the json contents of the response 
## here, we're assuming valid response
naep_resp_j = naep_resp.json()
naep_resp_j

## with result, turn it into a dataframe
naep_resp_d = pd.DataFrame(naep_resp_j['result'])
naep_resp_d

<Response [200]>

<class 'requests.models.Response'>


{'status': 200,
 'result': [{'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '1',
   'varValueLabel': 'Male',
   'value': 139.099504632971,
   'isStatDisplayable': 1,
   'errorFlag': 0},
  {'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '2',
   'varValueLabel': 'Female',
   'value': 158.567104984955,
   'isStatDisplayable': 1,
   'errorFlag': 0}]}

Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,1,Male,139.099505,1,0
1,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,2,Female,158.567105,1,0


## 1.2 What happens if there's an error in our query?

In [47]:
## here's a query that from the documentation we know
## won't work since i modified year to 2025 which doesnt
## exist in the data
wrong_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2025')

wrong_naep_query

'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2025'

In [48]:
## use requests to call the api
naep_wrong_resp = requests.get(wrong_naep_query)
naep_wrong_resp

<Response [200]>

In [49]:
dir(naep_wrong_resp)
naep_wrong_resp.content

['__attrs__',
 '__bool__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__nonzero__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_content',
 '_content_consumed',
 '_next',
 'apparent_encoding',
 'close',
 'connection',
 'content',
 'cookies',
 'elapsed',
 'encoding',
 'headers',
 'history',
 'is_permanent_redirect',
 'is_redirect',
 'iter_content',
 'iter_lines',
 'json',
 'links',
 'next',
 'ok',
 'raise_for_status',
 'raw',
 'reason',
 'request',
 'status_code',
 'text',
 'url']

b'{"status":400,"result": "System.Exception: The query \'SELECT DISTINCT Framework FROM Cycles WHERE Subject=\'WRI\' AND Cohort=2 AND CONVERT(VARCHAR(10),Year)+Sample IN (\'2025R3\')\' did not return exactly 1 framework. Make sure you can trend the years defined for the given subject and cohort.\r\n   at NRCDataService3.GetAdhocData.GetFramework(NDEContext& ndeContext, String subjectCode, List`1 yearSamples, String cohort) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 2091\r\n   at NRCDataService3.GetAdhocData.PopulateBaseOrchestratorRequest() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 1781\r\n   at NRCDataService3.GetAdhocData.ConstructRequest_Datapoint() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 628\r\n   at NRCDataService3.GetAdhocData.Page_Load(Object sender, EventArgs e) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 179"}'

In [50]:
## in the case of this particular api,
## the call returns some response but
## when we try to extract the json containing
## status or results, we get in an error
#naep_wrong_resp.json() # uncomment to see error

### 1.2.2 More all-purpose way of allowing remainder of calls to run: try, except

In [51]:
## putting it in a try; except as general error catching
try:
    results = naep_wrong_resp.json()['result']
except Exception as e:
    print('Failed to get result from API due to error:')
    print(e) # or just: pass

Failed to get result from API due to error:
Invalid control character at: line 1 column 289 (char 288)


### 1.2.3 Can usually also find more targeted way but that varies more across APIs

In [52]:
## if we wanted do more specific error catching,
## see that the status == 400 actually appears here
## so could write if else along those lines
naep_wrong_resp.text
naep_resp.text

if "System.Exception" in naep_wrong_resp.text:
    print("NAEP results not found")

'{"status":400,"result": "System.Exception: The query \'SELECT DISTINCT Framework FROM Cycles WHERE Subject=\'WRI\' AND Cohort=2 AND CONVERT(VARCHAR(10),Year)+Sample IN (\'2025R3\')\' did not return exactly 1 framework. Make sure you can trend the years defined for the given subject and cohort.\r\n   at NRCDataService3.GetAdhocData.GetFramework(NDEContext& ndeContext, String subjectCode, List`1 yearSamples, String cohort) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 2091\r\n   at NRCDataService3.GetAdhocData.PopulateBaseOrchestratorRequest() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 1781\r\n   at NRCDataService3.GetAdhocData.ConstructRequest_Datapoint() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 628\r\n   at NRCDataService3.GetAdhocData.Page_Load(Object sender, EventArgs e) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 179"}'

'{"status":200,"result": [{"year":2011,"sample":"R3","yearSampleLabel":"2011","Cohort":2,"CohortLabel":"Grade 8","stattype":"MN:MN","subject":"WRI","grade":8,"scale":"WRIRP","jurisdiction":"NP","jurisLabel":"National public","variable":"GENDER","variableLabel":"Gender","varValue":"1","varValueLabel":"Male","value":139.099504632971,"isStatDisplayable":1,"errorFlag":0},{"year":2011,"sample":"R3","yearSampleLabel":"2011","Cohort":2,"CohortLabel":"Grade 8","stattype":"MN:MN","subject":"WRI","grade":8,"scale":"WRIRP","jurisdiction":"NP","jurisLabel":"National public","variable":"GENDER","variableLabel":"Gender","varValue":"2","varValueLabel":"Female","value":158.567104984955,"isStatDisplayable":1,"errorFlag":0}]}'

NAEP results not found


## Activity 1: writing a function to make multiple, sequential calls

- Say we want to pull the data for grades 4, 8, and 12
- How can we write a function that iterates over a list of those grades and pulls the data for each grade?

**Note**: an ideal function would have arguments for each parameter in the API like subject, subscale, etc. Here we can leave those other parts constant

In [53]:
"I am in the {} building. and I also go to {} for lunch".format("Silsby", "Collis")

'I am in the Silsby building. and I also go to Collis for lunch'

In [54]:
activity_query = ('https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade={}&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011')

def pull_grades(query, grade):
    new_query = query.format(grade)
    try:
        # use requests to call the api
        naep_resp = requests.get(new_query)
        # get the json contents of the response
        naep_resp_j = naep_resp.json()
        # with result, turn it into a dataframe
        naep_resp_d = pd.DataFrame(naep_resp_j['result'])
        return naep_resp_d
    except Exception as e:
        return None
pull_grades(activity_query, 4)
pull_grades(activity_query, 8)
pull_grades(activity_query, 12)

Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,1,Male,139.099505,1,0
1,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,2,Female,158.567105,1,0


Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,3,Grade 12,MN:MN,WRI,12,WRIRP,NP,National public,GENDER,Gender,1,Male,141.256978,1,0
1,2011,R3,2011,3,Grade 12,MN:MN,WRI,12,WRIRP,NP,National public,GENDER,Gender,2,Female,155.385917,1,0


# 2. Example 2: needs credentials; no wrapper

Create an account here: https://www.yelp.com/developers/v3/manage_app

In [55]:
## get the key
API_KEY = "9a7z5JFCeZSZ9kY_ivUKIC1xZy7XgF6nYziSE_b50iYzuqX7fI6g161IzIqG_BgcoqLDp_YQBPDKvUrdtcS0Ho5iyKU1zqlBDK66TdruvRVzYVB73LxUwI7nSa0zZnYx"

In [56]:
## use documentation to define what to search
## doc: https://www.yelp.com/developers/documentation/v3/business_search
## write the query 
base_url = "https://api.yelp.com/v3/businesses/search?"
my_name = "restaurants"
my_location = "Hanover,NH,03755"
yelp_genquery = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = my_name,
                loc = my_location)

## use requests to call the API; here, we're
## passing it our credentials (structure varies
## by API and telling it to only return 10 results
## (max is 50 at once)
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp = requests.get(yelp_genquery, headers = header)
yelp_genresp

## then, look at structure of response
yelp_genjson = yelp_genresp.json()


<Response [200]>

In [57]:
## example business
yelp_genjson['businesses'][0]

## more automatic way of summarizing but things end up in lists
## within columns for things like categories
yelp_gendf = pd.DataFrame(yelp_genjson['businesses'])
yelp_gendf.head()

{'id': '8ybF6YyRldtZmU9jil4xlg',
 'alias': 'mollys-restaurant-and-bar-hanover',
 'name': "Molly's Restaurant & Bar",
 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/TJLrrA6z-SnPgZfrs2GQNQ/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/mollys-restaurant-and-bar-hanover?adjust_creative=jmRDz3dQUAD9n2fAUWwDPQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=jmRDz3dQUAD9n2fAUWwDPQ',
 'review_count': 508,
 'categories': [{'alias': 'tradamerican', 'title': 'American'},
  {'alias': 'burgers', 'title': 'Burgers'},
  {'alias': 'pizza', 'title': 'Pizza'}],
 'rating': 3.9,
 'coordinates': {'latitude': 43.701144, 'longitude': -72.2894249},
 'transactions': ['delivery'],
 'price': '$$',
 'location': {'address1': '43 South Main St',
  'address2': '',
  'address3': '',
  'city': 'Hanover',
  'zip_code': '03755',
  'country': 'US',
  'state': 'NH',
  'display_address': ['43 South Main St', 'Hanover, NH 03755']},
 'phone': '+16036432570',
 'display_phone': '(

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance,attributes
0,8ybF6YyRldtZmU9jil4xlg,mollys-restaurant-and-bar-hanover,Molly's Restaurant & Bar,https://s3-media4.fl.yelpcdn.com/bphoto/TJLrrA...,False,https://www.yelp.com/biz/mollys-restaurant-and...,508,"[{'alias': 'tradamerican', 'title': 'American'...",3.9,"{'latitude': 43.701144, 'longitude': -72.2894249}",[delivery],$$,"{'address1': '43 South Main St', 'address2': '...",16036432570,(603) 643-2570,250.83016,"{'business_temp_closed': None, 'menu_url': 'ht..."
1,XVGEEIH5rVB2QzW-qywcJw,base-camp-cafe-hanover,Base Camp Cafe,https://s3-media1.fl.yelpcdn.com/bphoto/p8_YiE...,False,https://www.yelp.com/biz/base-camp-cafe-hanove...,247,"[{'alias': 'himalayan', 'title': 'Himalayan/Ne...",4.4,"{'latitude': 43.700626, 'longitude': -72.2887803}",[delivery],$$,"{'address1': '3 Lebanon St', 'address2': 'Ste ...",16036432007,(603) 643-2007,196.139758,"{'business_temp_closed': None, 'menu_url': 'ht..."
2,5WW4g_LRwau29KyjZGLyAA,sawtooth-kitchen-hanover,Sawtooth Kitchen,https://s3-media3.fl.yelpcdn.com/bphoto/61MNG4...,False,https://www.yelp.com/biz/sawtooth-kitchen-hano...,25,"[{'alias': 'chickenshop', 'title': 'Chicken Sh...",4.1,"{'latitude': 43.70158, 'longitude': -72.289641}",[],,"{'address1': '33 S Main St', 'address2': '', '...",16036435134,(603) 643-5134,242.607552,"{'business_temp_closed': None, 'menu_url': 'ht..."
3,qxm3VNmD0O2zw8m9U8SxZg,duende-hanover-2,Duende,https://s3-media4.fl.yelpcdn.com/bphoto/rf_h2D...,False,https://www.yelp.com/biz/duende-hanover-2?adju...,11,"[{'alias': 'spanish', 'title': 'Spanish'}, {'a...",3.8,"{'latitude': 43.70074272457673, 'longitude': -...",[],,"{'address1': '15 Lebanon St', 'address2': '', ...",16033064826,(603) 306-4826,107.388862,"{'business_temp_closed': None, 'menu_url': 'ht..."
4,KA8yhrd-ClVYMyOefXdVYg,lous-restaurant-and-bakery-hanover,Lou's Restaurant & Bakery,https://s3-media3.fl.yelpcdn.com/bphoto/VAx8H9...,False,https://www.yelp.com/biz/lous-restaurant-and-b...,383,"[{'alias': 'tradamerican', 'title': 'American'...",4.2,"{'latitude': 43.70143, 'longitude': -72.289001}",[delivery],$$,"{'address1': '30 S Main St', 'address2': '', '...",16036433321,(603) 643-3321,244.006059,"{'business_temp_closed': None, 'menu_url': 'ht..."


In [58]:
## more data-specific way of summarizing
## we're doing a simple approach and just retaining
## cols that have a simple str structure
## if doing for real, would want to extract things
def clean_yelp_json(one_biz):

    ## restrict to str cols
    d_str = {key:value for key, value in one_biz.items()
             if type(value) == str}
    
    df_str = pd.DataFrame(d_str, index = [d_str['id']])
    return(df_str)

yelp_stronly = [clean_yelp_json(one_b) for one_b in yelp_genjson['businesses']]
yelp_stronly_df = pd.concat(yelp_stronly)

yelp_stronly_df.head(7)


Unnamed: 0,id,alias,name,image_url,url,price,phone,display_phone
8ybF6YyRldtZmU9jil4xlg,8ybF6YyRldtZmU9jil4xlg,mollys-restaurant-and-bar-hanover,Molly's Restaurant & Bar,https://s3-media4.fl.yelpcdn.com/bphoto/TJLrrA...,https://www.yelp.com/biz/mollys-restaurant-and...,$$,16036432570,(603) 643-2570
XVGEEIH5rVB2QzW-qywcJw,XVGEEIH5rVB2QzW-qywcJw,base-camp-cafe-hanover,Base Camp Cafe,https://s3-media1.fl.yelpcdn.com/bphoto/p8_YiE...,https://www.yelp.com/biz/base-camp-cafe-hanove...,$$,16036432007,(603) 643-2007
5WW4g_LRwau29KyjZGLyAA,5WW4g_LRwau29KyjZGLyAA,sawtooth-kitchen-hanover,Sawtooth Kitchen,https://s3-media3.fl.yelpcdn.com/bphoto/61MNG4...,https://www.yelp.com/biz/sawtooth-kitchen-hano...,,16036435134,(603) 643-5134
qxm3VNmD0O2zw8m9U8SxZg,qxm3VNmD0O2zw8m9U8SxZg,duende-hanover-2,Duende,https://s3-media4.fl.yelpcdn.com/bphoto/rf_h2D...,https://www.yelp.com/biz/duende-hanover-2?adju...,,16033064826,(603) 306-4826
KA8yhrd-ClVYMyOefXdVYg,KA8yhrd-ClVYMyOefXdVYg,lous-restaurant-and-bakery-hanover,Lou's Restaurant & Bakery,https://s3-media3.fl.yelpcdn.com/bphoto/VAx8H9...,https://www.yelp.com/biz/lous-restaurant-and-b...,$$,16036433321,(603) 643-3321
jAFLKiVXq9z_h4ZPy0nLMA,jAFLKiVXq9z_h4ZPy0nLMA,the-works-café-hanover,The Works Café,https://s3-media2.fl.yelpcdn.com/bphoto/olpsy-...,https://www.yelp.com/biz/the-works-caf%C3%A9-h...,,16032772082,(603) 277-2082
vMyN7JL5cJExJORgIobbQg,vMyN7JL5cJExJORgIobbQg,tuk-tuk-thai-cuisine-hanover,Tuk Tuk Thai Cuisine,https://s3-media4.fl.yelpcdn.com/bphoto/--bUG3...,https://www.yelp.com/biz/tuk-tuk-thai-cuisine-...,$$,16032779192,(603) 277-9192


# Activity 2: pull restaurants in a different location

- Try running a business search query for your hometown or another place by constructing a query similar to `yelp_genquery` but changing the location parameter
- Other endpoints require feeding what's called the business' fusion id into the API. Take an id from `yelp_stronly.id` and use the documentation here to pull the reviews for that business: https://docs.developer.yelp.com/reference/v3_business_reviews
- **Challenge**: generalize the previous step by writing a function that (1) takes a list of business ids as an input, (2) calls the reviews API for each id, (3) returns the results, and (4) rowbinds all results, i.e. turns them into a single, usable DataFrame

In [59]:
## use documentation to define what to search
## doc: https://www.yelp.com/developers/documentation/v3/business_search
## write the query 
base_url = "https://api.yelp.com/v3/businesses/search?"
my_name = "restaurants"
my_location = "Longview,TX,75605"
yelp_genquery = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = my_name,
                loc = my_location)

## use requests to call the API; here, we're
## passing it our credentials (structure varies
## by API and telling it to only return 10 results
## (max is 50 at once)
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp = requests.get(yelp_genquery, headers = header)
yelp_genresp

## then, look at structure of response
yelp_genjson = yelp_genresp.json()

<Response [200]>

In [60]:
## example business
yelp_genjson['businesses'][0]

## more automatic way of summarizing but things end up in lists
## within columns for things like categories
yelp_gendf = pd.DataFrame(yelp_genjson['businesses'])
yelp_gendf.head()

{'id': 'Hb_D_5nKTUkZdEfJnSxlDQ',
 'alias': 'evergreen-food-factory-longview',
 'name': 'Evergreen Food Factory',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/5v6wWa7ZoIy1FYx2TXRC4Q/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/evergreen-food-factory-longview?adjust_creative=jmRDz3dQUAD9n2fAUWwDPQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=jmRDz3dQUAD9n2fAUWwDPQ',
 'review_count': 47,
 'categories': [{'alias': 'korean', 'title': 'Korean'},
  {'alias': 'breakfast_brunch', 'title': 'Breakfast & Brunch'}],
 'rating': 4.8,
 'coordinates': {'latitude': 32.503674173777426,
  'longitude': -94.76887542754412},
 'transactions': ['pickup', 'delivery'],
 'location': {'address1': '1402 W Marshall Ave',
  'address2': '',
  'address3': None,
  'city': 'Longview',
  'zip_code': '75604',
  'country': 'US',
  'state': 'TX',
  'display_address': ['1402 W Marshall Ave', 'Longview, TX 75604']},
 'phone': '+19034700342',
 'display_phone': '(903) 470-0342

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,attributes,price
0,Hb_D_5nKTUkZdEfJnSxlDQ,evergreen-food-factory-longview,Evergreen Food Factory,https://s3-media2.fl.yelpcdn.com/bphoto/5v6wWa...,False,https://www.yelp.com/biz/evergreen-food-factor...,47,"[{'alias': 'korean', 'title': 'Korean'}, {'ali...",4.8,"{'latitude': 32.503674173777426, 'longitude': ...","[pickup, delivery]","{'address1': '1402 W Marshall Ave', 'address2'...",19034700342,(903) 470-0342,5080.546244,"{'business_temp_closed': None, 'menu_url': Non...",
1,wNfdtxP0rHof7xCyTq6Xkw,the-catch-longview,The Catch,https://s3-media2.fl.yelpcdn.com/bphoto/5MRx-O...,False,https://www.yelp.com/biz/the-catch-longview?ad...,234,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.2,"{'latitude': 32.5436941058377, 'longitude': -9...",[delivery],"{'address1': '3312 N Fourth St', 'address2': '...",19036632940,(903) 663-2940,1272.717812,"{'business_temp_closed': None, 'menu_url': 'ht...",$$
2,S2SOE4-QsjcyjQYMoPqJxA,scotties-bistro-longview,Scotties Bistro,https://s3-media2.fl.yelpcdn.com/bphoto/WqTzAf...,False,https://www.yelp.com/biz/scotties-bistro-longv...,51,"[{'alias': 'tradamerican', 'title': 'American'...",4.5,"{'latitude': 32.54532877788282, 'longitude': -...","[pickup, delivery]","{'address1': '1188 E Hawkins Pkwy', 'address2'...",14306257310,(430) 625-7310,2676.395007,"{'business_temp_closed': None, 'menu_url': Non...",
3,XkGy3rzvjLlQoCsqVSXElA,cafe-barrons-longview-4,Cafe Barron's,https://s3-media2.fl.yelpcdn.com/bphoto/jLMjkF...,False,https://www.yelp.com/biz/cafe-barrons-longview...,77,"[{'alias': 'cafes', 'title': 'Cafes'}, {'alias...",3.9,"{'latitude': 32.542082, 'longitude': -94.756265}",[delivery],"{'address1': '405 Loop 281 W', 'address2': '',...",19036634737,(903) 663-4737,1186.334123,"{'business_temp_closed': None, 'menu_url': Non...",$$
4,U5NUHLeNI8z_fk-QM4OzUA,bubbas-33-longview,Bubba's 33,https://s3-media1.fl.yelpcdn.com/bphoto/W2Zmos...,False,https://www.yelp.com/biz/bubbas-33-longview?ad...,162,"[{'alias': 'burgers', 'title': 'Burgers'}, {'a...",3.9,"{'latitude': 32.541333466370475, 'longitude': ...",[delivery],"{'address1': '200 E Loop 281', 'address2': '',...",19032321606,(903) 232-1606,389.871743,"{'business_temp_closed': None, 'menu_url': 'ht...",$$


In [61]:
## more data-specific way of summarizing
## we're doing a simple approach and just retaining
## cols that have a simple str structure
## if doing for real, would want to extract things
def clean_yelp_json(one_biz):

    ## restrict to str cols
    d_str = {key:value for key, value in one_biz.items()
             if type(value) == str}
    
    df_str = pd.DataFrame(d_str, index = [d_str['id']])
    return(df_str)

yelp_stronly = [clean_yelp_json(one_b) for one_b in yelp_genjson['businesses']]
yelp_stronly_df = pd.concat(yelp_stronly)

yelp_stronly_df.head(15)

Unnamed: 0,id,alias,name,image_url,url,phone,display_phone,price
Hb_D_5nKTUkZdEfJnSxlDQ,Hb_D_5nKTUkZdEfJnSxlDQ,evergreen-food-factory-longview,Evergreen Food Factory,https://s3-media2.fl.yelpcdn.com/bphoto/5v6wWa...,https://www.yelp.com/biz/evergreen-food-factor...,19034700342.0,(903) 470-0342,
wNfdtxP0rHof7xCyTq6Xkw,wNfdtxP0rHof7xCyTq6Xkw,the-catch-longview,The Catch,https://s3-media2.fl.yelpcdn.com/bphoto/5MRx-O...,https://www.yelp.com/biz/the-catch-longview?ad...,19036632940.0,(903) 663-2940,$$
S2SOE4-QsjcyjQYMoPqJxA,S2SOE4-QsjcyjQYMoPqJxA,scotties-bistro-longview,Scotties Bistro,https://s3-media2.fl.yelpcdn.com/bphoto/WqTzAf...,https://www.yelp.com/biz/scotties-bistro-longv...,14306257310.0,(430) 625-7310,
XkGy3rzvjLlQoCsqVSXElA,XkGy3rzvjLlQoCsqVSXElA,cafe-barrons-longview-4,Cafe Barron's,https://s3-media2.fl.yelpcdn.com/bphoto/jLMjkF...,https://www.yelp.com/biz/cafe-barrons-longview...,19036634737.0,(903) 663-4737,$$
U5NUHLeNI8z_fk-QM4OzUA,U5NUHLeNI8z_fk-QM4OzUA,bubbas-33-longview,Bubba's 33,https://s3-media1.fl.yelpcdn.com/bphoto/W2Zmos...,https://www.yelp.com/biz/bubbas-33-longview?ad...,19032321606.0,(903) 232-1606,$$
GkH1EOkMRzmqWPNl4Z3UiA,GkH1EOkMRzmqWPNl4Z3UiA,jaggers-longview,Jaggers,https://s3-media1.fl.yelpcdn.com/bphoto/QOa2P8...,https://www.yelp.com/biz/jaggers-longview?adju...,19035002774.0,(903) 500-2774,
p5JddvSKOUm5N_RAnCSyOA,p5JddvSKOUm5N_RAnCSyOA,nanny-goats-cafe-and-feed-bin-longview,Nanny Goats Cafe & Feed Bin,https://s3-media1.fl.yelpcdn.com/bphoto/gTbA1A...,https://www.yelp.com/biz/nanny-goats-cafe-and-...,19036534801.0,(903) 653-4801,$$
xBOyUPWvPbE5Lhcdkymj8Q,xBOyUPWvPbE5Lhcdkymj8Q,mi-casita-longview,Mi Casita,https://s3-media2.fl.yelpcdn.com/bphoto/dbMsrA...,https://www.yelp.com/biz/mi-casita-longview?ad...,19037588226.0,(903) 758-8226,$$
kTpmIYwolUve8hVCeYzO_A,kTpmIYwolUve8hVCeYzO_A,chiangmai-thai-kitchen-longview-3,Chiangmai Thai Kitchen,https://s3-media1.fl.yelpcdn.com/bphoto/TDA-uk...,https://www.yelp.com/biz/chiangmai-thai-kitche...,19036636622.0,(903) 663-6622,$$
Cci6YGrnTHyUv6uhMRBlDQ,Cci6YGrnTHyUv6uhMRBlDQ,roost-chicken-salad-and-deli-longview,Roost Chicken Salad & Deli,https://s3-media1.fl.yelpcdn.com/bphoto/FFpLU4...,https://www.yelp.com/biz/roost-chicken-salad-a...,14306257373.0,(430) 625-7373,


In [62]:
def get_business_reviews(business_ids):
    base_url = "https://api.yelp.com/v3/businesses/{}/reviews"
    header = {'Authorization': f"Bearer {API_KEY}"}
    total_reviews = []
    for bus_id in business_ids:
        total_query = base_url.format(bus_id)
        yelp_genresp = requests.get(total_query, headers = header)
        yelp_genresp
        yelp_genjson = yelp_genresp.json()['reviews']
        yelp_genjson
        for rev_dict in yelp_genjson:
            rev_dict["business_id"] = bus_id
        total_reviews += yelp_genjson
    final_df = pd.DataFrame(total_reviews)
    return final_df
get_business_reviews(["MRTdhLfHe-Uw-qQnQXxgVg", "PqyJx0ndtwBT9wLeFYnhoQ", "zeFASIGTjBwwPNf0ab86Jw"])

KeyError: 'reviews'

In [63]:
yelp_strongly_df

NameError: name 'yelp_strongly_df' is not defined