# Yelp API Restaurant Calls By City

For each city listed in cities.csv, return first 1k restaurants found via yelp api and
extract data regarding:

- name
- address & zip
- coordinates
- rating
- review count
- price level
- category
- yelp id

Write data to restaurant_data.csv

In [47]:
# Dependencies
from __future__ import print_function
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import csv
import urllib

import argparse

import pprint
import sys

#Import API key
from api_keys import api_key

### Perform API calls

- Dataframe from cities.csv
- Build function that calls and writes restaurant data to new csv, given a city
- Run list of cities through function

In [37]:
# Create dataframe from csv file
cities_df = pd.read_csv("Resources/cities.csv", names=["City", "County", "Population", "Area (sq. mi)"])
cities_df.head()

Unnamed: 0,City,County,Population,Area (sq. mi)
0,Alameda,Alameda,73812,10.61
1,Albany,Alameda,18539,1.79
2,American Canyon,Napa,19454,4.84
3,Antioch,Contra Costa,102372,28.35
4,Atherton,San Mateo,6914,5.02


In [119]:
cities = cities_df["City"]
cities_ca = []
for city in cities:
    cities_ca.append(city + str(', CA'))
print(cities_ca)

['Alameda, CA', 'Albany, CA', 'American Canyon, CA', 'Antioch, CA', 'Atherton, CA', 'Belmont, CA', 'Belvedere, CA', 'Benicia, CA', 'Berkeley, CA', 'Brentwood, CA', 'Brisbane, CA', 'Burlingame, CA', 'Calistoga, CA', 'Campbell, CA', 'Clayton, CA', 'Cloverdale, CA', 'Colma, CA', 'Concord, CA', 'Corte Madera, CA', 'Cotati, CA', 'Cupertino, CA', 'Daly City, CA', 'Danville, CA', 'Dixon, CA', 'Dublin, CA', 'East Palo Alto, CA', 'El Cerrito, CA', 'Emeryville, CA', 'Fairfax, CA', 'Fairfield, CA', 'Foster City, CA', 'Fremont, CA', 'Gilroy, CA', 'Half Moon Bay, CA', 'Hayward, CA', 'Healdsburg, CA', 'Hercules, CA', 'Hillsborough, CA', 'Lafayette, CA', 'Larkspur, CA', 'Livermore, CA', 'Los Altos, CA', 'Los Altos Hills, CA', 'Los Gatos, CA', 'Martinez, CA', 'Menlo Park, CA', 'Mill Valley, CA', 'Millbrae, CA', 'Milpitas, CA', 'Monte Sereno, CA', 'Moraga, CA', 'Morgan Hill, CA', 'Mountain View, CA', 'Napa, CA', 'Newark, CA', 'Novato, CA', 'Oakland, CA', 'Oakley, CA', 'Orinda, CA', 'Pacifica, CA', 'Pal

In [120]:
url = "https://api.yelp.com/v3/businesses/search"
headers = {"Authorization": "Bearer %s" % api_key}
params = {"term": "restaurants", "location":cities_ca[0], "limit":5, 'offset':5}
req = requests.get(url, headers=headers, params=params).json()

In [111]:
yelp_data = []
counter = 0

for city in cities_ca:
    for offset in range(0, 1000, 50):
            params = {"term": "restaurants", "location": str(city), "limit":50, 'offset':offset}
            req = requests.get(url, params=params, headers=headers).json()
            if len(req["businesses"])>0:
                for business in req['businesses']:
                    counter = counter+1
                    business_dic = {}
                    if 'name' in business:

                        business_dic['name'] = business['alias']
                    else:
                        business_dic['name'] = "NAN"

                    business_dic['categories'] = []

                    for category in business['categories']:
                        if 'title' in category:
                            business_dic['categories'].append(category['title'])
                    if 'coordinates' in business: 
                        if 'latitude' in business['coordinates']:
                            business_dic['lat'] = business['coordinates']['latitude']
                        else:
                            business_dic['lat'] = "NAN"
                        if 'longitude' in business['coordinates']:
                            business_dic['lon'] = business['coordinates']['longitude']
                        else:
                            business_dic['lon'] = 'NAN'
                    if 'location' in business:
                        if 'city' in business['location']:
                            business_dic['city'] = business['location']['city']
                        else:
                            business_dic['city'] = "NAN"
                        if 'zip_code' in business['location']:
                            business_dic['zip_code'] = business['location']['zip_code']
                        else:
                            business_dic['zip_code'] = "NAN"
                        if 'state' in  business['location']:
                            business_dic['state'] = business['location']['state']
                        else:
                            business_dic['state'] = "NAN"
                    if 'price' in business:
                        business_dic['price'] = business['price']
                    else:
                        business_dic['price'] = "NAN"
                    if 'rating' in business:
                        business_dic['rating'] = business['rating']
                    else:
                        business_dic['rating'] = "NAN"
                    if 'review_count' in business:
                        business_dic['review_count'] = business['review_count']
                    else:
                        business_dic['review_count'] = "NAN"
                    yelp_data.append(business_dic)
            else: 
                print("Data not found " +  str(city) + " " + str(offset))
                break
 
print(len(yelp_data))

Data not found Albany, CA 250
Data not found American Canyon, CA 300
Data not found Antioch, CA 600
Data not found Atherton, CA 600
Data not found Belmont, CA 450
Data not found Belvedere, CA 100
Data not found Benicia, CA 400
Data not found Brentwood, CA 300
Data not found Brisbane, CA 350
Data not found Calistoga, CA 50
Data not found Clayton, CA 300
Data not found Cloverdale, CA 50
Data not found Colma, CA 200
Data not found Corte Madera, CA 100
Data not found Cotati, CA 200
Data not found Danville, CA 300
Data not found Dixon, CA 500
Data not found Dublin, CA 600
Data not found East Palo Alto, CA 400
Data not found El Cerrito, CA 500
Data not found Fairfax, CA 50
Data not found Fairfield, CA 400
Data not found Foster City, CA 350
Data not found Gilroy, CA 500
Data not found Half Moon Bay, CA 100
Data not found Healdsburg, CA 150
Data not found Hercules, CA 150
Data not found Larkspur, CA 300
Data not found Livermore, CA 800
Data not found Martinez, CA 150
Data not found Mill Valley

In [112]:
yelp_data = pd.DataFrame(yelp_data)
yelp_data

Unnamed: 0,categories,city,lat,lon,name,price,rating,review_count,state,zip_code
0,"[Barbeque, Korean]",Alameda,37.757548,-122.252010,ohgane-korean-bbq-alameda,$$,4.5,51,CA,94501
1,[Vietnamese],Alameda,37.762117,-122.244805,made-pho-you-alameda,NAN,4.5,22,CA,94501
2,"[Mediterranean, Afghan]",Alameda,37.767478,-122.239856,angelas-kitchen-alameda,$$,4.5,203,CA,94501
3,[German],Alameda,37.766060,-122.239970,speisekammer-alameda,$$,4.0,1312,CA,94501
4,"[Italian, Cocktail Bars]",Alameda,37.757312,-122.251694,trabocco-kitchen-and-cocktails-alameda,$$,4.0,1157,CA,94501
5,"[Cajun/Creole, Noodles, Chicken Wings]",Alameda,37.768258,-122.239336,monkey-king-at-the-brewery-alameda,$$,4.0,291,CA,94501
6,"[Comfort Food, Asian Fusion, Ice Cream & Froze...",Alameda,37.762848,-122.245206,roll-and-bowl-alameda,$,4.5,58,CA,94501
7,[Burmese],Alameda,37.763690,-122.243440,burma-superstar-alameda-2,$$,4.0,1827,CA,94501
8,"[Chinese, Noodles, Burgers]",Alameda,37.762010,-122.244910,wild-ginger-alameda,$$,4.0,250,CA,94501
9,"[Cajun/Creole, Hawaiian, Seafood]",Alameda,37.765950,-122.242500,hang-ten-boiler-alameda,$$,4.0,1076,CA,94501


In [113]:
yelp_data.count()

categories      57438
city            57438
lat             57431
lon             57431
name            57438
price           57438
rating          57438
review_count    57438
state           57438
zip_code        57438
dtype: int64

In [114]:
yelp_data['city'].value_counts()

Oakland                    4553
San Jose                   3856
San Francisco              3840
Palo Alto                  1982
San Mateo                  1688
Redwood City               1630
Sunnyvale                  1605
Berkeley                   1523
Santa Clara                1446
Mountain View              1430
Fremont                    1417
Concord                    1392
Walnut Creek               1247
South San Francisco        1070
Santa Rosa                  915
Cupertino                   898
Hayward                     874
Pleasanton                  866
San Rafael                  826
San Bruno                   809
Vallejo                     792
Burlingame                  732
Menlo Park                  715
Vacaville                   651
San Carlos                  626
Richmond                    618
Dublin                      613
Alameda                     574
Antioch                     563
Campbell                    542
                           ... 
Point Ri

In [115]:
yelp_data['categories'].value_counts()

[Mexican]                                                    3269
[Chinese]                                                    1842
[Pizza]                                                      1426
[Thai]                                                        913
[Vietnamese]                                                  762
[Japanese, Sushi Bars]                                        732
[Indian]                                                      649
[Italian]                                                     640
[Sandwiches, Fast Food]                                       596
[American (New)]                                              576
[Sushi Bars, Japanese]                                        552
[Cafes]                                                       473
[Japanese]                                                    448
[Food Trucks, Mexican]                                        416
[Korean]                                                      403
[Burgers, 

In [116]:
yelp_data['price'].value_counts()

$$      29750
$       19130
NAN      6294
$$$      1888
$$$$      374
££          2
Name: price, dtype: int64

In [117]:
len(yelp_data['name'])

57438

In [118]:
yelp_data["zip_code"].unique()

array(['94501', '94607', '94612', '94619', '94606', '94609', '94610',
       '94601', '94611', '94602', '94577', '94605', '94608', '94618',
       '94621', '94603', '94502', '94541', '', '94703', '94620', '94105',
       '94103', '94660', '95616', '94110', 'LE7 1GZ', '94706', '94530',
       '94707', '94702', '94710', '94704', '94070', '94804', '94701',
       '94538', '94709', '94558', '94565', '94560', '95209', '94066',
       '94808', '95811', '95841', '94503', '94589', '94590', '94591',
       '94559', '94592', '94806', '94581', '94534', '95204', '95661',
       '94509', '94531', '94513', '94561', '94521', '94571', '94511',
       '94517', '94505', '94518', '94514', '94519', '94548', '95641',
       'BS4 4WW', '94526', '94520', '94025', '94301', '94063', '94061',
       '94306', '94304', '94305', '94062', '94303', '94040', '94309',
       '94002', '95112', '95127', '95051', '94043', '94089', '76520',
       '94085', '94404', '95054', '94539', '94080', '95050', '94544',
       '9403