# Yelp analysis on sleepy thru lively towns in Mass based on last call in relation to population size

In [None]:
import pandas as pd
import numpy as np
from yelpapi import YelpAPI
import json
import time
import math

Set connection to Yelp API

In [None]:
client_id = '...'
api_key = '...'
yelp_api = YelpAPI(api_key, timeout_s=3.0)

Search bars and lounges for each city/town. 

In [None]:
term = 'bars and lounges'
search_limit = 10
handled = []

towns = pd.read_csv(r'C:\Users\super\Documents\Learning\Yelp\town_list.csv')
town_list = towns['Town2']
first = town_list.pop(0)
response = yelp_api.search_query(term = term,
                                 location = first,
                                 limit = search_limit)
cols = list(response['businesses'][0].keys())
data = pd.DataFrame(columns=cols)
for biz in response['businesses']:
        data = data.append(biz, ignore_index=True)
        
for t in town_list:
    time.sleep(1)
    response = yelp_api.search_query(term = term,
                                 location = t,
                                 limit = search_limit)
    handled.append(t)
    
    for biz in response['businesses']:
        data = data.append(biz, ignore_index=True)

Deduplify and make N API calls to retrieve hours of operation for each business

In [None]:
master = data.drop_duplicates(subset=['id'])
id_list = master['id'].to_list()
closing_times = []
handled = 0

while handled < len(id_list):
    time.sleep(0.5)
    bid = id_list[handled]
    bar = yelp_api.business_query(bid)
    if 'hours' in bar.keys():
        hours = json.dumps(bar['hours'])
        hour = hours[hours.find(r'", "day": 6')-4:hours.find(r'", "day": 6')]
    else: hour = 'NA'
    handled += 1
    closing_times.append(hour)

Create class for how late open til where 2 AM = 0, 1 AM = 1 ... 8 PM = 6

In [None]:
dic = {'id' : id_list, 'close': closing_times}
close_times = pd.DataFrame(dic)

condition = [(close_times['close']=='2000'),(close_times['close']=='2100'),(close_times['close']=='2200'), 
             (close_times['close']=='2300'), (close_times['close']=='0000'), (close_times['close']=='0100'),
             (close_times['close']=='0200'), (close_times['close']=='2030'),(close_times['close']=='2130'),(close_times['close']=='2200'), 
             (close_times['close']=='2330'),(close_times['close']=='0030'), (close_times['close']=='0130'),
             (close_times['close']=='lse}'),(close_times['close']=='NA')]
values = [6,5,4,3,2,1,0,5.5,4.5,3.5,2.5,1.5,0.5,99,99]

close_times['close_class'] = np.select(condition, values, default=99)
close_times = close_times[close_times.close_class != 99]

Retrieve business city from address string

In [None]:
def extract_city(s):
    s = s[s.find("'city': ")+9:len(s)]
    s = s[0:s.find("'")]
    return s

master['town'] = master['location'].apply(extract_city)

Merge business data with popultion data

In [None]:
cols = {'id','name','town','rating','population','close_class'}
towns = pd.read_csv(r'C:\Users\super\Documents\Learning\Yelp\town_list.csv')
towns = towns.drop_duplicates(subset=['town'])
final2 = master.merge(towns, how='left', on='town')
final = final2.merge(close_times, how='left', on='id')
final = final[cols]
final['population'] = final['population'].str.replace(',','')
final['population'] = pd.to_numeric(final['population'], errors='coerce')
final = final.dropna(subset=['population'])

Create a score using parameterized formula. Here: town with 100 people open til 2 AM = 1.0 with penalties for more people by factors of ten and last call before 2 AM.

In [None]:
latest_by_town = final.loc[final.groupby('town').close_class.idxmin()].reset_index(drop=True)
hour_lambda = 0.9
pop_lambda = 0.92
latest_by_town['late_score'] = hour_lambda**(latest_by_town['close_class']) * pop_lambda**(np.log10(latest_by_town['population'])-2)
max_score = latest_by_town['late_score'].max()
latest_by_town['late_final_score'] = 1-(max_score-latest_by_town['late_score'])
latest_by_town

Results:

Four sleepiest towns under current parameters are: Wilmington, Auburn, Bedford, Rehoboth
All towns that shutter around 8 PM

Four liveliest towns are: Templeton, Lee, Mendon, Becket

Becket, MA is the livlest town with pours until 2 AM in a town with pop of 1,779!

Play around with the visual dashboard here: https://public.tableau.com/app/profile/gabor.asztalos1520/viz/LastCallvsPopulation/Dashboard1?publish=yes