In [1]:
# imports
import requests
import pandas as pd
import os
import json

In [2]:
yelp_apikey = os.environ["yelp_api"]

In [3]:
yelp_location_ids = []

def get_yelp_location_ids(coords, station_id):
    url = "https://api.yelp.com/v3/businesses/search?latitude=" + str(coords[0]) + "&longitude=" + str(coords[1]) + "&radius=50&categories=bar&categories=entertainment&attributes=parking_garage&sort_by=best_match&limit=5"

    headers = {
        "accept": "application/json",
        "Authorization": os.environ["yelp_api"]
    }

    response = requests.get(url, headers=headers)

    res = response.json()

    for i in range(len(res['businesses'])):
        location = {
            'station_id': station_id,
            'id': res['businesses'][i]['id'],
            'name': res['businesses'][i]['name']
        }
        yelp_location_ids.append(location)


In [4]:
# Data file
data_file = "austin_stations.csv"

# Read the data file
df = pd.read_csv(data_file)

# Iterate over each row in the dataframe
for index, row in df.iterrows():
    coords = [row['Latitude'], row['Longitude']]
    station_id = row['station_id']
    get_yelp_location_ids(coords, station_id)

location_dicts = []

for location in yelp_location_ids:
    location_dict = {
        'station_id': location['station_id'],
        'id': location['id'],
    }
    location_dicts.append(location_dict)


In [5]:
# Convert the list of dictionaries into a DataFrame
yelp_locations_df = pd.DataFrame(location_dicts)

# rename id to yelp_id
yelp_locations_df = yelp_locations_df.rename(columns={'id': 'yelp_id'})

# show results
yelp_locations_df.head()

Unnamed: 0,station_id,yelp_id
0,8723bfa08ec83b133f6a9aeecd075f51,1ynQ7GbkGJonlfqcAlcw1Q
1,a22287cc0ed65baf5d88b04916ad3f8e,10dR2YozAl0Tiwt0FnBMng
2,0071a519298ba811d05c62fcf03c39c6,jQ9a_tgvzmQjQRiF32rD2A
3,0071a519298ba811d05c62fcf03c39c6,Glcqpb74FN2ve2hgf5ImYw
4,127d6968e5b3b59b49005c0694d78815,Ovir9G_KtMdk0mzCvtdpdQ


In [6]:
# create a list of point of interest information
yelp_info_list = []

# get yelp information
for y_id in yelp_locations_df['yelp_id']:
    url = 'https://api.yelp.com/v3/businesses/' + str(y_id)

    headers = {
        "accept": "application/json",
        "Authorization": 'Bearer 8SoMeSZZeQapCOGKB0-I_Z8SaRzMMGrf2UT53yWqPcQ0yx38O6tCla3CNqe2w3vSTgxVvTehOG797hNQGGGOSry52dN66uJBRhjwDRM65aEESl_S18uKElZMI7CtZHYx'
    }

    response = requests.get(url, headers=headers)

    res = response.json()
    yelp_info_list.append(res)

In [7]:
def create_yelp_df(yelp_info_list):
    """
    Create the Yelp DataFrame from the given Yelp info list.
    """
    yelp_df = pd.json_normalize(yelp_info_list)
    yelp_df = yelp_df.rename(columns={'id': 'yelp_id'})
    yelp_df = yelp_df[['yelp_id', 'name', 'categories', 'review_count', 'rating', 'price', 'hours', 'location.zip_code', 'location.display_address', 'coordinates.latitude', 'coordinates.longitude']]
    return yelp_df

def normalize_categories(yelp_df):
    """
    Normalize the 'categories' column in the Yelp DataFrame.
    """
    categories_df = pd.json_normalize(yelp_df['categories'])
    categories_df = categories_df.apply(lambda x: x.str.get('title'))
    categories_df = categories_df.rename(columns={0: 'Type'})
    return categories_df

def merge_dataframes(yelp_df, categories_df):
    """
    Merge the Yelp DataFrame, and categories DataFrame.
    """
    merged_df = pd.concat([yelp_df, categories_df], axis=1)
    return merged_df


In [8]:
# Create DataFrame
yelp_df = create_yelp_df(yelp_info_list)

# Normalize the 'categories' column
categories_df = normalize_categories(yelp_df)

# Merge all DataFrames
merged_df = merge_dataframes(yelp_df, categories_df)

# Drop unnecessary columns
merged_df = merged_df.drop(columns=['categories', 'hours', 1, 2])

# Rename columns
merged_df = merged_df.rename(columns={'location.zip_code': 'zipcode', 'location.display_address': 'address', 'coordinates.latitude': 'yelp_latitude', 'coordinates.longitude': 'yelp_longitude'})

# Convert price levels to numeric values
price_mapping = {
    '$': 1,
    '$$': 2,
    '$$$': 3,
    '$$$$': 4
}
merged_df['price'] = merged_df['price'].replace(price_mapping)

# Convert null values to 0
merged_df = merged_df.fillna(0)

# Display result
merged_df.head()

Unnamed: 0,yelp_id,name,review_count,rating,price,zipcode,address,yelp_latitude,yelp_longitude,Type
0,1ynQ7GbkGJonlfqcAlcw1Q,CoffeePeople,3,5.0,0.0,78703,"[603 N Lamar Blvd, Austin, TX 78703]",30.271802,-97.752901,Cafes
1,10dR2YozAl0Tiwt0FnBMng,Torchy's Tacos,983,4.0,2.0,78704,"[1822 S Congress, Austin, TX 78704]",30.245406,-97.751624,Tacos
2,jQ9a_tgvzmQjQRiF32rD2A,Republic Square,16,4.0,0.0,78701,"[422 Guadalupe St, Austin, TX 78701]",30.26775,-97.74694,Parks
3,Glcqpb74FN2ve2hgf5ImYw,Dolce Terra,1,5.0,0.0,78701,"[422 Guadalupe st, Austin, TX 78701]",30.267659,-97.74704,Desserts
4,Ovir9G_KtMdk0mzCvtdpdQ,Pinch,37,4.0,1.0,78705,"[2100 University Ave, Austin, TX 78705]",30.2835,-97.739886,Food Trucks


In [9]:
yelp_merged = pd.merge(yelp_locations_df, merged_df, on = 'yelp_id')
yelp_merged.head()

Unnamed: 0,station_id,yelp_id,name,review_count,rating,price,zipcode,address,yelp_latitude,yelp_longitude,Type
0,8723bfa08ec83b133f6a9aeecd075f51,1ynQ7GbkGJonlfqcAlcw1Q,CoffeePeople,3,5.0,0.0,78703,"[603 N Lamar Blvd, Austin, TX 78703]",30.271802,-97.752901,Cafes
1,a22287cc0ed65baf5d88b04916ad3f8e,10dR2YozAl0Tiwt0FnBMng,Torchy's Tacos,983,4.0,2.0,78704,"[1822 S Congress, Austin, TX 78704]",30.245406,-97.751624,Tacos
2,0071a519298ba811d05c62fcf03c39c6,jQ9a_tgvzmQjQRiF32rD2A,Republic Square,16,4.0,0.0,78701,"[422 Guadalupe St, Austin, TX 78701]",30.26775,-97.74694,Parks
3,0071a519298ba811d05c62fcf03c39c6,Glcqpb74FN2ve2hgf5ImYw,Dolce Terra,1,5.0,0.0,78701,"[422 Guadalupe st, Austin, TX 78701]",30.267659,-97.74704,Desserts
4,127d6968e5b3b59b49005c0694d78815,Ovir9G_KtMdk0mzCvtdpdQ,Pinch,37,4.0,1.0,78705,"[2100 University Ave, Austin, TX 78705]",30.2835,-97.739886,Food Trucks


In [10]:
yelp_merged.to_csv('yelp_info.csv', index=False)