In [1]:
from collections import defaultdict
import json
import math
import numpy as np
import pandas as pd
import requests

In [2]:
covid_features = pd.read_json("yelp_academic_dataset_covid_features.json", lines=True)
d = {'TRUE': True, 'FALSE': False}
bool_cols = ['Call To Action enabled', 'Grubhub enabled', 'Request a Quote Enabled', 'delivery or takeout']
for col in bool_cols:
    covid_features[col] = covid_features[col].map(d)

In [7]:
businessDF = pd.read_json("yelp_academic_dataset_business.json", lines=True)

In [8]:
def get_restaurants(df):
    category_list = businessDF['categories'].tolist()
    restaurant_indices = list()
    for i in range(len(category_list)):
        if category_list[i] is None:
            continue
        category_string = category_list[i].lower()
        if 'restaurants' in category_string:
            restaurant_indices.append(i)
        
    return businessDF.ix[restaurant_indices]

def get_non_chains(restaurants):
    num_locations_dict = restaurants['name'].value_counts().to_dict()
    valid_locations = set([name for name in num_locations_dict.keys() if num_locations_dict[name] < 4])
    return restaurants[restaurants['name'].isin(valid_locations)]

def get_open_restaurants(restaurants):
    return restaurants[restaurants['is_open'] == 1]

def get_valid_restaurants(covid_features, businessDF):
    restaurants = get_restaurants(businessDF)
    non_chains = get_non_chains(restaurants)
    open_restaurants = get_open_restaurants(non_chains)
    
    covid_restaurants = covid_features[covid_features['business_id'].isin(set(open_restaurants['business_id']))]
    return covid_restaurants[(covid_restaurants['highlights'] != 'FALSE') | (covid_restaurants['Covid Banner'] != 'FALSE') | (covid_restaurants['Temporary Closed Until'] != 'FALSE')]

In [9]:
df = get_valid_restaurants(covid_features, businessDF)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  # This is added back by InteractiveShellApp.init_path()


In [10]:
response_json_by_id = dict()

In [11]:
def decorate_with_yelp_rest_api(df, response_json_by_id):
    headers = {
    'Authorization': 'Bearer X-uVlFVkQhgqPFA959OUiteVdFxQcvT_A9dRJek3UbluvLw0qgg6RCBaCeKdWwZ1CO-IOn15TGC8hOP-adJZghiMm4qyQrqB81xxbLF09TxY-yjWcttmj_xbNBq8X3Yx'
    }
    
    for business_id in df['business_id'].tolist():
        if business_id in response_json_by_id:
            continue
        print(business_id)
        url = 'https://api.yelp.com/v3/businesses/%s' % business_id
        response = requests.get(url, headers=headers)
        try:
            response.raise_for_status()
            if response.status_code == 200:
                response_json_by_id[business_id] = response.json()
            else:
                print("invalid request for %s" % business_id)
        except Exception as e:
            print("invalid request for %s" % business_id)        
    
    return response_json_by_id 

In [12]:
response_json_by_id = decorate_with_yelp_rest_api(df, response_json_by_id)
responseDF = pd.DataFrame.from_dict(response_json_by_id, orient='index')
joined = df.join(responseDF, on='business_id', how='inner')

NwWYSYGHFyHQMmsXu0SiYA
liPSeXaAfv8jxRGJ717VmQ
yipHhPQlCBmQnmoe-J3rlQ
2c9Vptks_vowLgVUMnCgjw
QABMhiuARzojyPYCATc2Pw
WnLhd38sH80ViWwzyF7yoA
4FEb2SzmU_l7SCQAbvW5Hg
CiNZmBtZRaRSXKjwun4R4A
wKlH90YB5RYFvJ8N3pstVw
tZpxk604kWWHsJKyirraZg
GlwylMeApNHgrlfmYvw6ZA
0EgYXYjt2XJL4hlsKnzrcw
KoAxhiUwigvfe3U7vn7tCA
VhJ0uenGgU1hIOJT2BnARw
qWMGMCPQoyRq7NpQ78ntmg
3CBLxydJU6CFRMGhu9b-sw
xGv3zpEr_yH7H_sCfEyzaQ
SGZAsdQAp0SjtMDjXWBPYw
4qTeGqt6YpufGxZXJ_FldQ
gNER9lE1Ma9FLm9MBsvYgg
JtvdUCawl-oIKXU9G06Log
yCK1Ok69D7TSWJESZQZHiQ
n2kgJSiB7Q4u7AkDvYfzlg
KQ6rLQA5ZZBe2KeJ9ur3Sg
_ER9MVaI63k222OeE8EH4A
CCoe81nfcF8JxuINPZhtVQ
EdPpsG0GujcVefs3ANGEGA
invalid request for EdPpsG0GujcVefs3ANGEGA
Rnck2nc0j-FbolNiae2grg
invalid request for Rnck2nc0j-FbolNiae2grg
KVaR0vF-7q36VvdU7-9clg
xnVkYE3iMp_aZniiCIuD0g
hxUxeY8DG9lhyjCVo_s-pw
lEgKurgXzZaMpExXtvBY7w
7rLoG6F6kJYE_gPppl8_5A
EXW-sodgyshlS9E_d325Pw
Z5TMiCHOGC7HtS-3gO-DOA
rD1QC53qgF9sbzNUaBEpNA
XJu14V0UvBP0YYGwzw7DGg
sUAJOwb1yFaSDreFADEO5Q
TqZgEObAI-LM9MqDTJf2Pw
YSYDJTCt5kzY2kKnL

P_86Zmln12-ltTAt_2gKNQ
a3gVxyKKpYHPuUaKEDNjjg
eEfDS8JjpUOoslhJDIbltA
mzREMIknfmagJugibXrCsQ
C3rH8u6P3wFH95AH8SeNmw
BsvCTCVG7lrzXZ68VyyIcg
vsroij-o3RU6QJ2yofOv2w
JYzzraHxJZy5atvx2hW0Uw
GJ9CgvTBKaDb9qLFEBbzHg
J5ebC7fKqrSF8KuS8d6hgw
_5ymRgVmdoewtUWLnHBMrw
uLUl_dMl6a9m774LGReDVA
_6UaHEnDDbUla3Pa3VVu3Q
7HbKKqXtZUjf6uVSHZ8wyw
FZXdI7r9PJVWjzS0S64kog
jqBUVAWJRJ6QIGXVuhdVvw
LiBnff-ZuzW16zIvZgFOfQ
Hdnx6cZBo0JfZopnQDWVYg
zb6PLyf8zHffr-rj3ez1lg
utoTa3Ngv_oSfB5TIMgN_Q
TXV5mRFOGjwY7igrPjkKVA
CjpHNIcWqOhCCyT1XbMbOA
9i7Gz3xaa-gwh1Fi3q1_kA
S3soHYEYKtBBsh2T4kcv6w
YzTafk0tQ-nAu1oICuuNFw
Sr-DO1Nt8X4I4Pjxapwz-g
K71ypipT7dPGK4rYGVNFkw
BEplfZTUzYwApjMUaOA12w
NVvjplHvDAoMOEGbYws4Sw
QFqtVX-lyZQgVHQrG-dhlg
0xso8uriixDURNmHy3x42A
1hUvpPDLiVS7Ht2LPROFTA
XeDtnkWNTd3oo75Vij96bQ
CXsnrd0x-cobsI2KpeW4HA
Ec9CBmL3285XkeHaNp-bSQ
0ZWfvVTbEudUc_yaQEppbQ
ZVtjS8Hvq2a6SxHxlehPGg
72hQ8xlNgNkhw2zi_qMFog
8yZ_nBgRC3CT1RFJC6KhXA
TgTiw8lMYoRNreSeYN9qWg
e1W5fwngUYf3kdsgxo4spg
Ug3Vx26eIrgUS5FJIPj-ng
Gr-2oBg4XyduSKbvnE-i9g
eJ5ruUOrXYl

naO2DJaInOVGG1ZRgA3N6Q
ffXOeotInfX8G_dK5_s6Ag
9JdhMYy-VhxoqbAyz3Dm6g
sh4u0HAjCZMzgN9_VVn7LA
mKT30qU_MlLj4Ps82JAZ8w
6x4BDO6DrX9QXecdPCpVwA
ZVMoTe5ZQ99gYVUydKbjqg
2-su3xdxOvyF0_2GZN4FLw
qCiUfpgds55QqYduavptJA
GHNxtMVFHLZdtyGI84ax9Q
NTfoeDZFyorjhuUXIYkYQw
jwuR0AGCw_BaM8uEkKyVEw
PsdWWQE_9GrfmCNfz2yW4g
nu5qpsddW1MIINqNPpKeKQ
invalid request for nu5qpsddW1MIINqNPpKeKQ
nZDIrGshkfLZf6ImQtAasQ
X6chSU0KBGHGNxZ8UH_qag
4LoiyV3UHf-cFlqGVNBY4Q
invalid request for 4LoiyV3UHf-cFlqGVNBY4Q
cIPGRCdJfP1TYFcagoDMkw
QWRieSmElMDJ9ENGG0JDuA
lvFmpUptBSGxVQBig0RdpA
sJP1QOC9foqcu2_udjoyIw
Da0c8BkSx0gxk5HwoZJiYQ
RMEzxhYfpnEsWlfozit_xw
XyRhCc2Rga93tmr0K7yIBw
l-TaawqI-mlN4z_N9WdW9Q
EM4Qo0Tm40H8Nfk3s8Dy0g
utIA0LyQmwP-9DRyxUe6qQ
MFOkTx2WUFgWbAxQ86dEoQ
_R-rjgz4Kwi4f3u0clFeKg
BCga5CbMqcZLCpxAuln57w
OdF7pXmoLgSJh7fL-vIXlA
DW7ttoWMaI2jWN41gekbiA
H-taqYgQEDEGModP6mxBcQ
I6oxEn1HT41S2GPrDKPPvA
hPXR-Bi8U-uz6TUViqmGpg
0RELQcu2_pIQH-nkPICOSw
invalid request for 0RELQcu2_pIQH-nkPICOSw
cUpdeZJawIwOl6G21z7CzA
AaLvYFsLuyO1uGk0QLxN

6rYnfSWSkyXnlD3Fjep3PQ
invalid request for 6rYnfSWSkyXnlD3Fjep3PQ
lWrq9cL4o2j6isKPK1c9Wg
u0F9WJbECpg5fEesAx94xA
J07TDs6qnTIaxm48EL1cKQ
AY3Cxky3L3aNbRo_o4YRgg
Jl2MIN70I2DGw8KhVp6g7Q
2yIYlVu8KjD7GQTWWpO0SQ
kEXyCpzZYrPu4DtlMNfyMA
J4Pm8fpKDXgSLudBKL1FZg
WtO8eoYmrc2_8kuXanug2Q
uuSviSjm_dR2K30C_2_STQ
Oz1Ci9y2MnwYyAjVsoZg5Q
Fozo0B-y42EhRMomR0K5vQ
y2N-7XV-C3NUtJX8O-uUlw
lLU4ZeYi0DQACTQQcYn71w
LlSehKBi-vx0TE5x2Q4R_w
owwKUc2VFUMgRPZFEgrtKA
rTicAE1mbLnyMuUrUFD6WA
YuhAHVsstmDENWuZM30tMA
fmv6aWk3IJE-gyEr6ZfpTg
9PIC380tmNYpIC3z5cvcNA
h3pMMRwqBjMeMtk0_qQzRQ
U0UnBjz9DugW2qRZputKBg
oMt3FrXtmYiP2ayn46PHtQ
IWAQV7gISpJe8UH_q7cxHQ
8Kj8djbAZHEDImwjlmvOJQ
FogTa-wmjhVnJCoTiaxvZA
Zw9wEAk9L6oTZi63f4zVvA
kwi2TiPbCAVHsBNkEkZjVA
3YKV1eralyrKQqoWOTJMDA
5A_sjC9ULLt4UkDNtr9dkw
auTNigMttjBBTj1MNxOeFQ
vK1_qKEG7zHvjiTOsN0CRg
Y4aFR2VSgSz07idEJEX3yg
lZaSwRBRgg0ulXR8MKzL-w
A8IouaHOlMHoVIRuAKbKzA
_XN-GwzZwAyIqLKJsl2htg
XrcB7Of1KfHlkeXYpHLIrw
-Bdw-5H5C4AYSMGnAvmnzw
8zO1gDlfujftc6ZJr43nHA
fxw_kCJJDSQDtY7QJ66Fog
2iLeiQ4OCJVwfo

CCVh8YKdN3c-WgqEbzya3A
gOOfBSBZlffCkQ7dr7cpdw
6ppfHX6akf11e4XExIhiGA
e1jI2-vU1fd4UKpyogtxuA
kF6uM_KKWvBMGFdxMe9k2A
_SQvOdb33mhlNJZYEYUKxA
rioQ_p2pILNbJ4Xp5jW6-Q
AN0bWhisCf6LN9eHZ7DQ3w
invalid request for AN0bWhisCf6LN9eHZ7DQ3w
MjLRxNBuFR9DvA2XzQAXIA
kNlna23mYfRqGv7VJByKkA
C1jQ5CbszJkS_cyWgaRVGQ
Ijje9MLW_lv9BWNu-us6rw
u9R0_pvEkl7QGPJYH3j1sg
v5q-q9-Ra2c72DJEEgzs6g
9fH0ow907crGLmPnHFeI0A
_-farJKsDz5NO-sXidiPhw
_P7OjSDvuywYpcp8m_7O9w
SORm6UKRlwHC6SmJZnbFEw
4nbBl7xV_O3KfozqUuosQw
UjQwHkVAGFJYz5nSISEJ3w
XL33UIY33sl3eyr1hyDxMQ
rEg65FRo6ZPMlh5LAdMrZg
1s8Zu9irOTZtW88ca9P41Q
FQzSV_tUK7XG-bAoJ9sRSA
6_kSCuQP5U4W8SfD8VVNZA
2DmTw0k3bcXuHdAyxO_bkw
mYlNFkIiFY3SyDl-nPWR5g
w2D-bXyIaKwh3AlEb-LygQ
S4oe6TlWNqfMSBEL30Ej3Q
0CB7YB1qRSWLQvMbHw3Fmw
invalid request for 0CB7YB1qRSWLQvMbHw3Fmw
GuGQzX2d_Y01sv1RVrZ9yA
S3gVMHlrQoJPzM984vCeBg
O694V2x8GL6Ff-BMezRb8A
MmBiJCKvaJkC86ovcoEmFg
Ipkl7KtlqxCe8bMMkwHwPQ
RwlumSqNBunw1a1LXccN8A
NIOwzgujIXKVBEVNTQBXpg
rqAZ3EyUVqxG0x6eD1Zw3Q
CopqthwWKu7-TrpxGa70ug
CxzUdREgfS4ymtXFS

eYJcI9cI6-kJ2NQpYdHcLw
eUuKXhloFVHdQsLdodcLbw
RMyK2LpfFDty75HE94XN0Q
UhyDVz7npxKUl9BfMpfmBw
2nFwsGBpikkDh74biLfeaA
K6fYrrTorlpXmqutRcrHzg
Z5L-jOexcqCXfXWTIWnYlw
AKBSPjk_H_w8RCqCE_vUuA
H-WYHSzXPWS9Avj0W6SRpg
0q_BHpxbikVtPRRLRu-U0g
QxKTm1nGBZhCxCmOGCZdkA
Tw3miGKZHtmxmaQZIYFRrA
fXzMs__3HjTYwKPMAb4jYA
ASvQjxsHVd8J8o_kHJUQiQ
8b5ll2kjXfjgFIqWsjkr8Q
_4Gpn_NIM94D9OFucgfaJQ
mPfdYJj4d5KBPINDkaZY-Q
Mpy2zw01M1IcEP4svSPOIg
yvwK4MO2qnSZE5ggYGyk1A
ii8sAGBexBOJoYRFafF9XQ
BjbokieOGk2CCq2t8wV6MA
Q7OsHUJgkPHuHNTDBJqPag
xpJEBXTCQh5Ib8BJrjt9Bg
xRsm1kRIGbL_nGCtxd551g
rb4d1_OtpVD91J5NDp4Y3w
J4CATH00YZrq8Bne2S4_cw
-95mbLJsa0CxXhpaNL4LvA
eoyvbnRYQe-z85e8Rc6vAg
I9nziYEw4YIFjr3ZDBpFog
rYBQpEiJck-Q_ClE9vH2og
X67a8NpGhqQWdzZt0zuCQg
axX1hO_lEn9C-na3mMN4Aw
8GBmys1QbunrRRnXOhlLeg
ARfXk9Sa2vrK36fAGvdmWQ
5PP6BwtNYlPARHApB9lzcQ
PDIh1lVtKlUqPJROXYJQvQ
w2_nR3p2XMWTP0J8GA_v_A
ewAmzOqnSAfLBdt4Stc8bA
tMlVZWa4SsI01qYMRQerKg
YhTwTm31WOl8ug7TCW8ubA
zU63bs3Ofe8y1peOgOFJaA
S0UZ-8cnerfOU8snZABzTA
4zNvBpxABIbXR4GUbwO3Ow
D_pwairtGGR

Zkn1y0l3eDjzIONKMNl-ew
HLtqx6CTxdu9ApzTBnSiww
PGHRgFMWpN9Ko-jmU7jRGQ
3dXx7CWqjYPX16WOlj1wEw
XbkpdDXhunFhAWLNRAiVIw
qWSwDbtJRBvKTiMpH3rNLQ
oRKyGtUsN5IcRReN7ScUwQ
hZsoH0AHkN2OyXjOAmVQNw
oc8204pyvf9ixdA4JzzaLA
yeZr0NBthDQuQ-JjPDkxyw
7k5Oxf3D4Y0vuD1X6SmK-g
F06m2yQSPHIrb1IT7heYeQ
ajoqEHnCZTD8-8GqGLq9-Q
p4YvOzp42g3JzlXJny8F0g
R_wW7aDtoRjGw8lgM2dB3w
QZz6EwylI-Tn7dcxP-1uvA
QkkZW4axID_hBqdu-a_aWQ
SQZ7pssVk8esHqAHgAIAnA
K7lWdNUhCbcnEvI0NhGewg
K-uQkfSUTwu5LIwPB4b_vg
qPc7nlbV-YdyHVCrEJHj5g
rh6yL-dxvCEo-bJk_UTE7w
NSEEnYde9aC_HYva0Yinmw
hhhHgSaSl6GLtuDoDrMUZQ
U4iqELk1IiqARpk5uJJOZg
grlL6zmHBOer9rPycsRhBA
llCxryWr8j1S39tusYCWxg
Wag4bnx38zJ4ihDn8-7--A
Iq7NqQD-sESu3vr9iEGuTA
lvinwVzENRij1G3_9J9dZQ
eZqoDfBg-xOS8WdcZCYM2A
a1v8QIdbA_HTg_YyNlBY_w
DA-ddRqcReCe_DcXKicvsQ
q5I1eO_K6AHC-3B8DB9Caw
UFthIIorkchiGj_5ThZ1Uw
HhcBx0LSDkxRBKMkvkwSoA
l1yQDLp2ZErFhvDiJCjlKA
XMMLRvV4IMxIGyc4H37LxA
pwablDuLsQOdpHAEcNFtww
OEMTOWaemuov7TFD1sW3lw
TFKB3U0th0ryO5Nb9Qghtg
lfvuSEh8qCLi9gGc1l1GnQ
sF0Y740tAkmG9EeXF90b9Q
xp_3WHJDdWV

In [453]:
print('Total number of restaurants: %s'% len(joined))
print('Total number closed: %s' % joined['is_closed'].sum())

Total number of restaurants: 2301
Total number closed: 88


In [454]:
def generate_price_df(df):
    filtered = df[~df['price'].isna()]
    filtered['price'] = filtered['price'].str.len()
    indicator_cols = pd.get_dummies(filtered['price'])
    for col in indicator_cols.columns:
        filtered[col] = indicator_cols[col]
    return filtered

In [455]:
def generate_hours_df(df):
    breakfast = np.zeros(len(df))
    lunch = np.zeros(len(df))
    dinner = np.zeros(len(df))

    hours_list = df['hours'].tolist()
    for i in range(len(hours_list)):
        hours = hours_list[i]

        if isinstance(hours, float):
            continue

        open_hours = hours[0]['open'][0]
        start = open_hours['start']
        end = open_hours['end']

        if start <= '0900':
            breakfast[i] = 1

        if start < '1300' and end > '1300':
            lunch [i] = 1

        if end >= '2000' or open_hours['is_overnight']:
            dinner[i] = 1

        df['breakfast'] = breakfast
        df['lunch'] = lunch
        df['dinner'] = dinner

    return df[~df['hours'].isna()]

In [5]:
flattened_categories = {
    'breakfast_brunch': 'breakfast_brunch',
    'waffles': 'breakfast_brunch',
    'bagels': 'breakfast_brunch',
    'pizza': 'pizza',
    'newamerican': 'american',
    'tradamerican': 'american',
    'burgers': 'american',
    'italian': 'italian',
    'cafes': 'cafes',
    'coffee': 'cafes',
    'coffeeroasteries': 'cafes',
    'creperies': 'cafes',
    'wraps': 'cafes',
    'salad': 'cafes',
    'soups': 'cafes',
    'sandwiches': 'sandwiches',
    'delis': 'sandwiches',
    'chinese': 'asian',
    'thai': 'asian',
    'korean': 'asian',
    'vietnamese': 'asian',
    'indpak': 'asian',
    'filipino': 'asian',
    'asianfusion': 'asian',
    'japanese': 'asian',
    'sushi': 'asian',
    'ramen': 'asian',
    'poke': 'asian',
    'hawaiian': 'asian',
    'desserts': 'desserts',
    'icecream': 'desserts',
    'cupcakes': 'desserts',
    'bakeries': 'desserts',
    'bars': 'bars',
    'cocktailbars': 'bars',
    'breweries': 'bars',
    'sportsbars': 'bars',
    'wine_bars': 'bars',
    'gastropubs': 'bars',
    'beerbar': 'bars',
    'beer_and_wine': 'bars',
    'mediterranean': 'mid_east',
    'greek': 'mid_east',
    'mideastern': 'mid_east',
    'turkish': 'mid_east',
    'latin': 'latin',
    'mexican': 'latin',
    'tacos': 'latin',
    'tex-mex': 'latin',
    'newmexican': 'latin',
    'brazilian': 'latin',
    'peruvian': 'latin'
}

def generate_categories_df(df):
    category_dict = defaultdict(int)
    for categories in df['categories'].tolist():
        for category in categories:
            name = category['alias']
            category_dict[name] = category_dict[name] + 1
    category_list = sorted([(k, v) for k, v in category_dict.items()], key=lambda x: -1 * x[1])
    
    values_for_category = dict()
    for category in set(flattened_categories.values()):
        values_for_category[category] = np.zeros((len(df)))
    
    df_cat_list = df['categories'].tolist()
    for i in range(len(df_cat_list)):
        categories = df_cat_list[i]  
        for category in categories:
            name = category['alias']
            if name in flattened_categories:
                values_for_category[flattened_categories[name]][i] = 1  
            
    for category in set(flattened_categories.values()):
        df[category] = values_for_category[category]
        
    return df

In [457]:
def generate_location_df(df):
    df['city'] = df['location'].map(lambda x: x['city'])
    df['state'] = df['location'].map(lambda x: x['state'])
    df['zip_code'] = df['location'].map(lambda x: x['zip_code'])
    return df

In [470]:
def generate_covid_highlights_df(covid_restaurants):
    highlight_lists = list(covid_restaurants['highlights'])
    highlights = set()
    for hl in highlight_lists:
        if hl == 'FALSE':
            continue
        for highlight in json.loads(hl):
            highlights.add(highlight['identifier'])

    values_for_highlight = dict()
    for highlight in highlights:
        values_for_highlight[highlight] = np.zeros((len(covid_restaurants)))

    for i in range(len(highlight_lists)):
        if highlight_lists[i] == 'FALSE':
            continue
        for highlight in json.loads(highlight_lists[i]):
            values_for_highlight[highlight['identifier']][i] = 1    

    for highlight in highlights:
        covid_restaurants[highlight] = values_for_highlight[highlight]  
    return covid_restaurants, highlights   

In [459]:
# For each value of the column, calculate the rate at which restaurants with this column have closed
def calculate_closing_rate(df, column):
    with_col = df[df[column] == 1]
    mean = with_col['is_closed'].mean()
    std_err = math.sqrt(mean * (1 - mean) / len(with_col))
    return (mean, std_err, len(with_col))

In [468]:
hoursDF = generate_hours_df(joined)
for meal in ['breakfast', 'lunch', 'dinner']:
    closing_rate = calculate_closing_rate(hoursDF, meal)
    print('%s, %s, %s, %s' % (meal, closing_rate[0], closing_rate[1], closing_rate[2]))   

breakfast, 0.02414486921529175, 0.00688536407479, 497
lunch, 0.0327132777421424, 0.00450522604396, 1559
dinner, 0.02599009900990099, 0.0039579014344, 1616


In [6]:
categoriesDF = generate_categories_df(joined)
for category in set(flattened_categories.values()):
    closing_rate = calculate_closing_rate(categoriesDF, category)    
    print('%s, %s, %s, %s' % (category, closing_rate[0], closing_rate[1], closing_rate[2]))   

NameError: name 'joined' is not defined

In [464]:
priceDF = generate_price_df(joined)
for price in priceDF['price'].unique():
    closing_rate = calculate_closing_rate(priceDF, price)
    print('%s, %s, %s, %s' % (price, closing_rate[0], closing_rate[1], closing_rate[2]))    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


2, 0.04041811846689895, 0.00519880006033, 1435
1, 0.034722222222222224, 0.00880821763644, 432
3, 0.029411764705882353, 0.0129584659592, 170
4, 0.0, 0.0, 44


In [477]:
covidHighlightsDF, highlights = generate_covid_highlights_df(joined)
for highlight in highlights:
    closing_rate = calculate_closing_rate(covidHighlightsDF, highlight)
    if closing_rate[2] < 100:
        continue
    print('%s, %s, %s, %s' % (highlight, closing_rate[0], closing_rate[1], closing_rate[2]))  

curbside_pickup_during_covid_19, 0.030534351145038167, 0.008678892025, 393
takeout_during_covid_19, 0.022146507666098807, 0.00607393962175, 587
family_owned_operated, 0.007874015748031496, 0.00784295442085, 127
delivery_during_covid_19, 0.026530612244897958, 0.00726000208844, 490
gluten_free_friendly, 0.025, 0.0142521928137, 120
vegetarian_friendly, 0.017241379310344827, 0.00986812881849, 174
locally_owned_operated, 0.013513513513513514, 0.00949071326529, 148
gift_cards_during_covid_19, 0.026923076923076925, 0.0100380477822, 260
