In [1]:
# imports
import requests
import pandas as pd
import os

In [2]:
yelp_apikey = os.environ["yelp_api"]

In [3]:
def get_yelp_ids():
    """
    Retrieves and returns the list of Yelp IDs and names.
    """
    request_url = 'https://api.yelp.com/v3/businesses/search?location=Austin%2C%20TX%2078701&radius=1000&categories=bar&categories=entertainment&attributes=&sort_by=best_match&limit=25'
    headers = {
        "accept": "application/json",
        "Authorization": os.environ["yelp_api"]
    }
    response = requests.get(request_url, headers=headers)
    return response.json()


In [4]:
def process_yelp_data(yelp_data):
    """
    Processes the Yelp data and returns the final DataFrame.
    """
    df = pd.DataFrame(yelp_data['businesses'])
    
    id_df = df[['id', 'name','categories', 'review_count', 'rating', 'coordinates', 'location']]
    id_df = id_df.rename(columns={'id':'yelp_id'})
    
    category_df = pd.json_normalize(id_df['categories'])
    category_df = category_df.rename(columns={0:'cat_types'})
    category_df = category_df.drop(columns=[1, 2])
    
    cat_type = pd.json_normalize(category_df['cat_types'])
    cat_type = cat_type.rename(columns={'title':'category'})
    cat_type = cat_type.drop(columns=['alias'])
    
    merge_cat = pd.concat([category_df, cat_type], axis=1)
    merge_cat = merge_cat.drop(columns=['cat_types'])
    
    additional_cols = df[['review_count', 'rating', 'coordinates', 'location']]
    add_cat = pd.concat([id_df[['yelp_id', 'name']], merge_cat, additional_cols], axis=1)
    
    coords = pd.json_normalize(add_cat['coordinates'])
    add_coords = pd.concat([add_cat[['yelp_id', 'name', 'category', 'review_count', 'rating']], coords, additional_cols], axis=1)
    
    zip_code = pd.json_normalize(add_coords['location'])
    zip_code = zip_code.drop(columns=['address1', 'address2', 'address3', 'city', 'country', 'state', 'display_address'])
    
    add_loc = pd.concat([add_coords[['yelp_id', 'name', 'category', 'review_count', 'rating', 'latitude', 'longitude']], zip_code], axis=1)
    
    return add_loc

In [5]:
# Call the function to retrieve the Yelp IDs and names
yelp_data = get_yelp_ids()

# Process the Yelp data and get the final DataFrame
final_df = process_yelp_data(yelp_data)

final_df.head()

Unnamed: 0,yelp_id,name,category,review_count,review_count.1,rating,rating.1,latitude,longitude,zip_code
0,cs6HfZNykLVitm09jWFqWg,Moonshine Patio Bar & Grill,Southern,5778,5778,4.5,4.5,30.263754,-97.738077,78701
1,XOo0oa5sXCZGjKXapIN95w,Red Ash,Italian,1209,1209,4.5,4.5,30.26588,-97.744672,78701
2,Tta8AYBZq3fwr1n2up75vQ,Eureka!,American (Traditional),1065,1065,4.0,4.0,30.2678,-97.7412,78701
3,CqOkl94q9ttvYNWIITA8ow,Swift's Attic,American (New),1166,1166,4.0,4.0,30.26563,-97.74337,78701
4,v1UzkU8lEWdjxq8byWFOKg,Gus's World Famous Fried Chicken - Austin,Southern,3101,3101,4.5,4.5,30.263486,-97.741731,78701


In [7]:
final_df.to_csv('additional_poi.csv', index=False)