In [1]:
import pandas as pd
from yelpapi import YelpAPI
import os, json, math, time
from tqdm.notebook import tqdm_notebook

In [4]:
# Load credentials
with open('/Users/caell/.secret/yelp_api.json') as f:
    login = json.load(f)
    
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [5]:
LOCATION = 'San Diego, CA'
TERM = 'Sushi'

In [6]:
JSON_FILE = 'Data/in_progress_yelp_sd_sushi.json'

In [17]:
def create_json_file(JSON_FILE, delete_if_exists=True):
    file_exists = os.path.isfile(JSON_FILE)
    
    if file_exists:
        
        if delete_if_exists:
            print(f'{JSON_FILE} exists, deleting file')
            os.remove(JSON_FILE)
            create_json_file(JSON_FILE)
        else:
            print(f'{JSON_FILE} already exists')
    
    else:
        print(f'{JSON_FILE} does not exists, creating new file as empty list')
        folder = os.path.dirname(JSON_FILE)
        if len(folder) > 0:
            os.makedirs(folder, exist_ok=True)
        with open(JSON_FILE, 'w') as f:
            json.dump([],f)

In [18]:
create_json_file(JSON_FILE)

Data/in_progress_yelp_sd_sushi.json exists, deleting file
Data/in_progress_yelp_sd_sushi.json does not exists, creating new file as empty list


In [19]:
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)

n_results = len(previous_results)
print(f'{n_results} previous results found')
results = yelp_api.search_query(location=LOCATION,
                          term=TERM,
                         offset=n_results)
total_results = results['total']
results_per_page = len(results['businesses'])

print(f'downloading {results_per_page} results per page')

n_pages = math.ceil((total_results - n_results) / results_per_page)

print(f'{n_pages} pages remaining to download')

0 previous results found
downloading 20 results per page
65 pages remaining to download


In [20]:
for i in tqdm_notebook(range(1,n_pages+1)):
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    
    n_results = len(previous_results)
    
    if n_results + results_per_page > 1000:
        print('Exceeded 1000 call limit, stopping loop')
        break
    
    results = yelp_api.search_query(location=LOCATION,
                                   term=TERM,
                                   offset=n_results)
    
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE, 'w') as f:
        json.dump(previous_results, f)
        
    time.sleep(.2)
print(f'{len(previous_results)} results successfully retrieved')

  0%|          | 0/65 [00:00<?, ?it/s]

Exceeded 1000 call limit, stopping loop
1000 results successfully retrieved


In [22]:
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,O1NFJA0OI0-N77JzP7qlfQ,azuki-sushi-san-diego-2,Azuki Sushi,https://s3-media4.fl.yelpcdn.com/bphoto/OhZXVg...,False,https://www.yelp.com/biz/azuki-sushi-san-diego...,2644,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 32.7296447753906, 'longitude': -1...","[delivery, restaurant_reservation, pickup]",$$,"{'address1': '2321 5th Ave', 'address2': '', '...",16192384760,(619) 238-4760,6796.380089
1,iFrXIJmSiEjlEHuakTaKnA,kumi-cafe-san-diego,Kumi Cafe,https://s3-media4.fl.yelpcdn.com/bphoto/xfe9-W...,False,https://www.yelp.com/biz/kumi-cafe-san-diego?a...,135,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 32.81971, 'longitude': -117.1494}","[delivery, restaurant_reservation, pickup]",$$,"{'address1': '4380 Kearny Mesa Rd', 'address2'...",18585651288,(858) 565-1288,3269.091429
2,3S6VtYgcXL8lRic0iV5GYQ,sushi-ota-san-diego,Sushi Ota,https://s3-media2.fl.yelpcdn.com/bphoto/MtEcy_...,False,https://www.yelp.com/biz/sushi-ota-san-diego?a...,4234,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 32.8036636, 'longitude': -117.217...",[delivery],$$$,"{'address1': '4529 Mission Bay Dr', 'address2'...",18588808778,(858) 880-8778,6032.314235
3,werz_RjIeFEFNflwl6kdXA,sushi-yorimichi-san-diego,Sushi Yorimichi,https://s3-media2.fl.yelpcdn.com/bphoto/zFNNEO...,False,https://www.yelp.com/biz/sushi-yorimichi-san-d...,123,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 32.78695, 'longitude': -117.17053}","[delivery, restaurant_reservation, pickup]",$$,"{'address1': '2405 Ulric St', 'address2': None...",18582796868,(858) 279-6868,1599.327274
4,TSYhYwOPeaZmRNuiJoZaAw,soichi-sushi-san-diego-2,Soichi Sushi,https://s3-media3.fl.yelpcdn.com/bphoto/dDCfEd...,False,https://www.yelp.com/biz/soichi-sushi-san-dieg...,364,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",5.0,"{'latitude': 32.76267, 'longitude': -117.1418}","[delivery, pickup]",$$$$,"{'address1': '2121 Adams Ave', 'address2': '',...",16196772220,(619) 677-2220,3309.247571


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,l8M3vNqlAKN_s8NiSVBZ2w,board-and-brew-scripps-ranch-san-diego,Board & Brew - Scripps Ranch,https://s3-media2.fl.yelpcdn.com/bphoto/oKIL30...,False,https://www.yelp.com/biz/board-and-brew-scripp...,950,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.5,"{'latitude': 32.91559, 'longitude': -117.11459}",[delivery],$$,"{'address1': '9880 Hibert St', 'address2': 'St...",18586892739.0,(858) 689-2739,14381.278559
996,3jYKnm8jHcjaVkt6mRSoIQ,francos-flapjack-family-restaurant-bonita-2,Franco's Flapjack Family Restaurant,https://s3-media4.fl.yelpcdn.com/bphoto/lFbxHT...,False,https://www.yelp.com/biz/francos-flapjack-fami...,779,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 32.6589957193909, 'longitude': -1...","[pickup, delivery]",$$,"{'address1': '4164 Bonita Rd', 'address2': '',...",16195121673.0,(619) 512-1673,18250.672462
997,mtWpuuIdsHzKYMjUcxSPyw,jalisco-cafe-bonita-2,Jalisco Cafe,https://s3-media3.fl.yelpcdn.com/bphoto/2oBhF4...,False,https://www.yelp.com/biz/jalisco-cafe-bonita-2...,261,"[{'alias': 'mexican', 'title': 'Mexican'}]",3.0,"{'latitude': 32.658179195321324, 'longitude': ...","[pickup, delivery]",$,"{'address1': '4026 Bonita Rd', 'address2': '',...",16192677855.0,(619) 267-7855,18187.817299
998,IjBYMYnzrs6IdwRRHkKZGA,berts-bistro-san-diego,Bert's Bistro,https://s3-media4.fl.yelpcdn.com/bphoto/hCdjR_...,False,https://www.yelp.com/biz/berts-bistro-san-dieg...,8,"[{'alias': 'cafes', 'title': 'Cafes'}]",3.0,"{'latitude': 32.771885, 'longitude': -117.194577}",[],$$,"{'address1': 'University Of San Diego', 'addre...",,,4318.254525
999,j0cLQdianbAcvZeM97uWyA,home-brew-mart-ballast-point-san-diego,Home Brew Mart - Ballast Point,https://s3-media2.fl.yelpcdn.com/bphoto/nyz7xi...,False,https://www.yelp.com/biz/home-brew-mart-ballas...,468,"[{'alias': 'breweries', 'title': 'Breweries'},...",4.0,"{'latitude': 32.7667841646776, 'longitude': -1...",[delivery],$,"{'address1': '5401 Linda Vista Rd', 'address2'...",16192952337.0,(619) 295-2337,4669.792839


In [24]:
final_df.duplicated(subset='id').sum()

7

In [25]:
final_df.drop_duplicates(subset='id', inplace=True)
final_df.duplicated(subset='id').sum()

0

In [26]:
final_df.to_csv('Data/final_SD_sushi_spots.csv.gz',
               compression='gzip', index=False)

In [27]:
test_df = pd.read_csv('Data/final_SD_sushi_spots.csv.gz')
test_df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,O1NFJA0OI0-N77JzP7qlfQ,azuki-sushi-san-diego-2,Azuki Sushi,https://s3-media4.fl.yelpcdn.com/bphoto/OhZXVg...,False,https://www.yelp.com/biz/azuki-sushi-san-diego...,2644,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 32.7296447753906, 'longitude': -1...","['delivery', 'restaurant_reservation', 'pickup']",$$,"{'address1': '2321 5th Ave', 'address2': '', '...",16192380000.0,(619) 238-4760,6796.380089
1,iFrXIJmSiEjlEHuakTaKnA,kumi-cafe-san-diego,Kumi Cafe,https://s3-media4.fl.yelpcdn.com/bphoto/xfe9-W...,False,https://www.yelp.com/biz/kumi-cafe-san-diego?a...,135,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 32.81971, 'longitude': -117.1494}","['delivery', 'restaurant_reservation', 'pickup']",$$,"{'address1': '4380 Kearny Mesa Rd', 'address2'...",18585650000.0,(858) 565-1288,3269.091429
2,3S6VtYgcXL8lRic0iV5GYQ,sushi-ota-san-diego,Sushi Ota,https://s3-media2.fl.yelpcdn.com/bphoto/MtEcy_...,False,https://www.yelp.com/biz/sushi-ota-san-diego?a...,4234,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 32.8036636, 'longitude': -117.217...",['delivery'],$$$,"{'address1': '4529 Mission Bay Dr', 'address2'...",18588810000.0,(858) 880-8778,6032.314235
3,werz_RjIeFEFNflwl6kdXA,sushi-yorimichi-san-diego,Sushi Yorimichi,https://s3-media2.fl.yelpcdn.com/bphoto/zFNNEO...,False,https://www.yelp.com/biz/sushi-yorimichi-san-d...,123,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 32.78695, 'longitude': -117.17053}","['delivery', 'restaurant_reservation', 'pickup']",$$,"{'address1': '2405 Ulric St', 'address2': None...",18582800000.0,(858) 279-6868,1599.327274
4,TSYhYwOPeaZmRNuiJoZaAw,soichi-sushi-san-diego-2,Soichi Sushi,https://s3-media3.fl.yelpcdn.com/bphoto/dDCfEd...,False,https://www.yelp.com/biz/soichi-sushi-san-dieg...,364,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",5.0,"{'latitude': 32.76267, 'longitude': -117.1418}","['delivery', 'pickup']",$$$$,"{'address1': '2121 Adams Ave', 'address2': '',...",16196770000.0,(619) 677-2220,3309.247571
