In [38]:
import json
import pandas as pd
import numpy as np

In [39]:
file_path = "/Users/alyssanguyen/Desktop/IRLE_scraping/csv_files/raw_prices_ubereats_ca_ff_03222024.csv"
ca_ff = pd.read_csv(file_path)

In [40]:
#Drop all the columns we don't need 
ca_ff_ = ca_ff.drop(columns=['Unnamed: 0', 'inputted_location', 'number_of_ratings', 'restaurant_distance'])

In [41]:
ca_ff_['restaurant_name'].unique()

array(['McDonald', 'Jack in the Box', 'Wendy', 'Burger King',
       'Shake Shack', 'Sonic', 'Five Guys', 'The Habit', 'Carls Jr'],
      dtype=object)

Exploring missing values 

In [42]:
#Nan values 
nan_count = ca_ff.isnull().sum()
print(nan_count)

Unnamed: 0               0
restaurant_name          0
menu_item                0
menu_item_price          0
restaurant_location    250
inputted_location        0
restaurant_rating        0
number_of_ratings        0
restaurant_distance    235
dtype: int64


In [43]:
#There are two inputted locations that have no restaurant address: 

#Mcdonald's at 1950 Parkside Dr, Concord, CA 94519 

# Wendy's at 1600 Truxtun Ave, Bakersfield, CA 93306

#Let's drop those restaurants

ca_ff_ = ca_ff_.dropna(subset=['restaurant_location'])

In [44]:
#restaurant_rating cleaning 

rows_with_mi = ca_ff_['restaurant_rating'].str.contains('mi').sum()

print("Number of rows with 'mi' in restaurant rating:", rows_with_mi)

#Remove restaurants with no valid rating 
ca_ff_['restaurant_rating'] = ca_ff_['restaurant_rating'].str.replace(r'.*mi$', '0', regex=True)

Number of rows with 'mi' in restaurant rating: 3742


In [45]:
#converting data types 
ca_ff_['restaurant_name'] = ca_ff_['restaurant_name'].astype('string')
ca_ff_['menu_item'] = ca_ff_['menu_item'].astype('string')
ca_ff_['menu_item'] = ca_ff_['menu_item'].str.replace(r'\s+', ' ', regex=True)
ca_ff_['restaurant_location'] = ca_ff_['restaurant_location'].astype('string')
ca_ff_['restaurant_rating'] = ca_ff_['restaurant_rating'].str.strip().astype(float)

In [46]:
#cleaning up string columns 

ca_ff_['menu_item'] = ca_ff_['menu_item'].str.lower()
ca_ff_['restaurant_location'] = ca_ff_['restaurant_location'].str.lower()

#remove special characters
ca_ff_['menu_item'] = ca_ff_['menu_item'].apply(lambda x: ''.join(ch for ch in x if ch.isalnum() or ch.isspace()))
ca_ff_

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,restaurant_rating
0,McDonald,medium french fries,4.69,"1330 jackson st, oakland, ca, 94612, us",4.6
1,McDonald,10 pc chicken mcnuggets,6.69,"1330 jackson st, oakland, ca, 94612, us",4.6
2,McDonald,big mac,6.79,"1330 jackson st, oakland, ca, 94612, us",4.6
3,McDonald,2 cheeseburger meal,10.99,"1330 jackson st, oakland, ca, 94612, us",4.6
4,McDonald,cheeseburger,3.49,"1330 jackson st, oakland, ca, 94612, us",4.6
...,...,...,...,...,...
148708,The Habit,ranch,0.63,"1855 e daily dr, camarillo, ca, 93010, us",4.8
148709,The Habit,bbq sauce,0.63,"1855 e daily dr, camarillo, ca, 93010, us",4.8
148710,The Habit,teriyaki sauce,0.63,"1855 e daily dr, camarillo, ca, 93010, us",4.8
148711,The Habit,spicy red pepper sauce,0.63,"1855 e daily dr, camarillo, ca, 93010, us",4.8


In [47]:
def price_list(x):
    return list(x)

McDonald's 

In [48]:
#Filter to just McDonalds
ca_ff_mcd = ca_ff_[ca_ff_['restaurant_name'] == 'McDonald']

#First part of grouping 

agg_funcs = {
    'menu_item_price': ['mean', 'median', 'std'],  # calculate the average, median, and standard dev PRICE
    'restaurant_rating': 'mean', # calculate the average RATING 
    'menu_item' : 'count'
}

grouped_mcd = ca_ff_mcd.groupby(['restaurant_name','restaurant_location']).agg(agg_funcs).reset_index()
grouped_mcd.columns = [' '.join(col).strip() for col in grouped_mcd.columns.values]


#Second part of grouping 
mcd_lst = ['big mac', 'big mac meal', 'cheeseburger', 'hamburger', 'medium french fries']

# Filter rows where 'menu_item' contains any item in mcd_lst
menu_items_mcd = ca_ff_mcd[ca_ff_mcd['menu_item'].isin(mcd_lst)].sort_values('menu_item')
menu_items_mcd = menu_items_mcd.drop_duplicates(subset=['restaurant_name', 'restaurant_location', 'menu_item'])

grouped_mcd_2 = menu_items_mcd.groupby(['restaurant_name', 'restaurant_location'])['menu_item_price'].agg(price_list).reset_index()

grouped_mcd_2[['specialty_item', 'combo', 'cheeseburger', 'hamburger', 'fries']] = grouped_mcd_2['menu_item_price'].apply(pd.Series)
grouped_mcd_2.drop(columns=['menu_item_price'], inplace=True)

#Merging the grouped dfs together 
merged_mcd = pd.merge(grouped_mcd, grouped_mcd_2, on=['restaurant_name', 'restaurant_location'], how='inner')
merged_mcd

Unnamed: 0,restaurant_name,restaurant_location,menu_item_price mean,menu_item_price median,menu_item_price std,restaurant_rating mean,menu_item count,specialty_item,combo,cheeseburger,hamburger,fries
0,McDonald,"100 e redlands blvd, san bernardino, ca, 92408...",5.752083,4.79,4.912890,4.6,144,7.89,13.99,4.39,3.49,5.89
1,McDonald,"1000 e 4th st, long beach, ca, 90802, us",6.172979,4.89,5.901370,4.7,141,8.29,14.39,4.39,3.89,5.29
2,McDonald,"1000 real rd, bakersfield, ca, 93309, us",5.209320,3.69,6.140426,4.9,147,6.19,11.39,3.09,2.79,4.59
3,McDonald,"10051 bruceville road, elk grove, ca, 95757, us",6.915203,4.74,9.726763,4.4,148,7.69,11.79,4.09,3.69,4.99
4,McDonald,"1009 n wilson way, stockton, ca, 95205-4217, us",6.868792,4.69,9.710387,4.5,149,7.69,11.79,4.09,3.69,4.99
...,...,...,...,...,...,...,...,...,...,...,...,...
223,McDonald,"981 lakeville hwy, petaluma, ca, 94952, us",5.105510,3.99,4.207664,4.7,294,8.69,14.79,4.19,3.79,5.69
224,McDonald,"990 alamo dr, vacaville, ca, 95687, us",7.186987,4.89,9.495503,4.7,156,8.49,12.99,3.59,3.49,5.29
225,McDonald,"chester &amp; brundage, bakersfield, ca, 93304...",4.840504,4.09,3.933102,4.5,139,7.49,11.89,3.99,3.09,5.19
226,McDonald,"mission blvd, santa rosa, ca, 95409, us",4.947208,3.59,4.495181,4.6,154,7.99,12.69,3.89,3.39,4.89


Jack in the Box 

In [49]:
#Filter to just Jack in the Box
ca_ff_jack = ca_ff_[ca_ff_['restaurant_name'] == 'Jack in the Box']

#First part of grouping 

agg_funcs = {
    'menu_item_price': ['mean', 'median', 'std'],  # calculate the average, median, and standard dev PRICE
    'restaurant_rating': 'mean', # calculate the average RATING 
    'menu_item' : 'count'
}

grouped_jack = ca_ff_jack.groupby(['restaurant_name','restaurant_location']).agg(agg_funcs).reset_index()
grouped_jack.columns = [' '.join(col).strip() for col in grouped_jack.columns.values]

In [50]:
# #Second part of grouping 
jack_lst = ['jr jumbo jack', 'jr jumbo jack cheeseburger', 'jumbo jack', 'large french fry', 'large jumbo jack combo']

# Filter rows where 'menu_item' contains any item in mcd_lst
menu_items_jack = ca_ff_jack[ca_ff_jack['menu_item'].isin(jack_lst)].sort_values('menu_item')
menu_items_jack = menu_items_jack.drop_duplicates(subset=['restaurant_name', 'restaurant_location', 'menu_item'])

grouped_jack_2 = menu_items_jack.groupby(['restaurant_name', 'restaurant_location'])['menu_item_price'].agg(price_list).reset_index()

grouped_jack_2[['hamburger', 'cheeseburger', 'specialty_item', 'fries', 'combo']] = grouped_jack_2['menu_item_price'].apply(pd.Series)
grouped_jack_2.drop(columns=['menu_item_price'], inplace=True)

In [51]:
# #Merging the grouped dfs together 
merged_jack = pd.merge(grouped_jack, grouped_jack_2, on=['restaurant_name', 'restaurant_location'], how='inner')
merged_jack

Unnamed: 0,restaurant_name,restaurant_location,menu_item_price mean,menu_item_price median,menu_item_price std,restaurant_rating mean,menu_item count,hamburger,cheeseburger,specialty_item,fries,combo
0,Jack in the Box,"10 union ave, bakersfield, ca, 93307, us",8.563382,7.49,4.300687,4.4,136,3.74,4.11,7.49,5.36,13.73
1,Jack in the Box,"100 s. state college blvd., anaheim, ca, 92806...",7.567080,6.49,3.884794,4.6,137,3.24,3.49,6.11,4.74,11.98
2,Jack in the Box,"1003 east california avenue, bakersfield, ca, ...",10.281912,8.55,5.771447,4.3,136,4.74,4.99,9.11,5.99,18.35
3,Jack in the Box,"1004 steele ln, santa rosa, ca, 95403, us",7.798346,6.61,4.140169,4.1,133,3.61,3.99,6.24,4.86,12.60
4,Jack in the Box,"10110 sierra ave, fontana, ca, 92335, us",6.957868,5.99,3.623544,4.5,136,3.11,3.47,5.39,4.31,10.90
...,...,...,...,...,...,...,...,...,...,...,...,...
200,Jack in the Box,"9179 elk grove florin road, elk grove, ca, 956...",8.526483,8.11,4.331201,4.3,145,3.74,4.11,6.99,4.99,13.85
201,Jack in the Box,"960 n ventura rd, oxnard, ca, 93030, us",7.543577,6.25,3.871830,4.7,137,3.49,3.74,6.86,4.49,12.23
202,Jack in the Box,"9620 e stockton blvd, elk grove, ca, 95624, us",8.278085,7.11,4.262878,4.6,141,3.36,3.61,6.61,4.99,13.48
203,Jack in the Box,"986 woodside rd, redwood city, ca, 94061, us",7.623066,6.29,4.135448,5.0,137,2.27,2.53,5.99,4.74,12.35


Wendy's

In [52]:
#Filter to just Wendy's
ca_ff_wendy = ca_ff_[ca_ff_['restaurant_name'] == 'Wendy']

#First part of grouping 

agg_funcs = {
    'menu_item_price': ['mean', 'median', 'std'],  # calculate the average, median, and standard dev PRICE
    'restaurant_rating': 'mean', # calculate the average RATING 
    'menu_item' : 'count'
}

grouped_wendy = ca_ff_wendy.groupby(['restaurant_name','restaurant_location']).agg(agg_funcs).reset_index()
grouped_wendy.columns = [' '.join(col).strip() for col in grouped_wendy.columns.values]

In [53]:
# #Second part of grouping 
wendy_lst = ['daves combo', 'daves single', 'french fries', 'jr cheeseburger', 'jr hamburger']

# Filter rows where 'menu_item' contains any item in mcd_lst
menu_items_wendy = ca_ff_wendy[ca_ff_wendy['menu_item'].isin(wendy_lst)].sort_values('menu_item')
menu_items_wendy = menu_items_wendy.drop_duplicates(subset=['restaurant_name', 'restaurant_location', 'menu_item'])

grouped_wendy_2 = menu_items_wendy.groupby(['restaurant_name', 'restaurant_location'])['menu_item_price'].agg(price_list).reset_index()

grouped_wendy_2[['combo', 'specialty_item', 'fries', 'cheeseburger', 'hamburger']] = grouped_wendy_2['menu_item_price'].apply(pd.Series)
grouped_wendy_2.drop(columns=['menu_item_price'], inplace=True)

In [54]:
#FLAG: prices are $0 recheck
merged_wendy = pd.merge(grouped_wendy, grouped_wendy_2, on=['restaurant_name', 'restaurant_location'], how='inner')
merged_wendy[merged_wendy['combo'] == 0]

Unnamed: 0,restaurant_name,restaurant_location,menu_item_price mean,menu_item_price median,menu_item_price std,restaurant_rating mean,menu_item count,combo,specialty_item,fries,cheeseburger,hamburger
0,Wendy,"1001 redwood, vallejo, ca, 94590, us",3.074574,2.410,3.318024,4.5,376,0.0,6.29,0.0,2.39,2.43
1,Wendy,"10020 arlington avenue, riverside, ca, 92503, us",3.961158,3.400,4.243538,4.3,95,0.0,8.16,0.0,3.65,3.40
2,Wendy,"10040 chapman ave., garden grove, ca, 92840, us",3.876737,3.400,4.154732,4.5,190,0.0,8.41,0.0,3.65,3.65
3,Wendy,"1012 n. state college blvd., anaheim, ca, 9280...",3.864947,3.400,4.156502,4.7,95,0.0,8.16,0.0,3.65,3.65
4,Wendy,"102 encinites blvd, encinitas, ca, 92024, us",4.010105,3.400,4.189891,4.8,95,0.0,8.28,0.0,3.89,3.40
...,...,...,...,...,...,...,...,...,...,...,...,...
130,Wendy,"9180 rosedale hwy, bakersfield, ca, 93312, us",4.103579,3.400,4.413871,4.2,95,0.0,8.77,0.0,3.40,2.92
131,Wendy,"924 pleasant grove blvd, roseville, ca, 95678, us",4.025000,3.175,4.714360,4.4,194,0.0,7.35,0.0,3.25,3.60
132,Wendy,"960 eastlake parkway, chula vista, ca, 91914, us",3.911158,3.280,4.108367,4.7,95,0.0,8.04,0.0,3.65,3.28
133,Wendy,"960 w. el norte, escondido, ca, 92026, us",3.205789,2.690,3.367382,4.9,95,0.0,6.59,0.0,2.99,2.69


Burger King

In [55]:
#Filter to just Burger King 
ca_ff_bk = ca_ff_[ca_ff_['restaurant_name'] == 'Burger King']

#First part of grouping 

agg_funcs = {
    'menu_item_price': ['mean', 'median', 'std'],  # calculate the average, median, and standard dev PRICE
    'restaurant_rating': 'mean', # calculate the average RATING 
    'menu_item' : 'count'
}

grouped_bk = ca_ff_bk.groupby(['restaurant_name','restaurant_location']).agg(agg_funcs).reset_index()
grouped_bk.columns = [' '.join(col).strip() for col in grouped_bk.columns.values]

In [56]:
# #Second part of grouping 
#they don't have a plain hamburger FOR NOW using whopper jr 
bk_lst = ['cheeseburger', 'french fries', 'whopper', 'whopper jr', 'whopper meal']

# Filter rows where 'menu_item' contains any item in mcd_lst
menu_items_bk = ca_ff_bk[ca_ff_bk['menu_item'].isin(bk_lst)].sort_values('menu_item')
menu_items_bk = menu_items_bk.drop_duplicates(subset=['restaurant_name', 'restaurant_location', 'menu_item'])

grouped_bk_2 = menu_items_bk.groupby(['restaurant_name', 'restaurant_location'])['menu_item_price'].agg(price_list).reset_index()

grouped_bk_2[['cheeseburger', 'fries', 'specialty_item', 'hamburger', 'combo']] = grouped_bk_2['menu_item_price'].apply(pd.Series)
grouped_bk_2.drop(columns=['menu_item_price'], inplace=True)

In [57]:
merged_bk = pd.merge(grouped_bk, grouped_bk_2, on=['restaurant_name', 'restaurant_location'], how='inner')
merged_bk

Unnamed: 0,restaurant_name,restaurant_location,menu_item_price mean,menu_item_price median,menu_item_price std,restaurant_rating mean,menu_item count,cheeseburger,fries,specialty_item,hamburger,combo
0,Burger King,"10055 cedar avenue, bloomington, ca, 92316, us",8.970256,6.69,5.832077,4.6,156,2.79,4.89,10.29,5.39,15.29
1,Burger King,"1009 n ben maddox way, visalia, ca, 93292, us",9.749524,7.74,6.267647,4.8,84,3.49,5.29,9.89,3.79,16.69
2,Burger King,"1030 mclaughlin ave., san jose, ca, 95122, us",8.656750,7.34,6.086458,4.3,80,2.63,4.31,9.23,4.67,15.58
3,Burger King,"1042 north carpenter road, modesto, ca, 95351, us",9.540633,7.79,5.886226,3.9,79,3.89,5.59,9.79,5.39,16.39
4,Burger King,"111, colma, ca, 94014, us",8.927654,6.69,6.040351,4.4,162,2.89,5.69,9.89,4.89,16.69
...,...,...,...,...,...,...,...,...,...,...,...,...
162,Burger King,"950 west a street, hayward, ca, 94541, us",8.522716,6.79,5.850353,4.7,81,2.73,4.61,9.86,4.86,14.98
163,Burger King,"969 east francisco blvd., san rafael, ca, 9490...",8.897927,6.69,6.009174,4.4,164,2.89,5.69,9.89,4.89,16.69
164,Burger King,"97 bonita road, chula vista, ca, 91910, us",9.098250,6.99,5.912452,4.5,80,2.79,4.89,10.29,5.39,15.29
165,Burger King,"972 el camino real, south san francisco, ca, 9...",9.273889,8.04,6.144790,4.8,144,4.09,4.19,8.29,5.29,15.29


Shake Shack

In [58]:
#Filter to just Shake Shack
ca_ff_shake = ca_ff_[ca_ff_['restaurant_name'] == 'Shake Shack']

#First part of grouping 

agg_funcs = {
    'menu_item_price': ['mean', 'median', 'std'],  # calculate the average, median, and standard dev PRICE
    'restaurant_rating': 'mean', # calculate the average RATING 
    'menu_item' : 'count'
}

grouped_shake = ca_ff_shake.groupby(['restaurant_name','restaurant_location']).agg(agg_funcs).reset_index()
grouped_shake.columns = [' '.join(col).strip() for col in grouped_shake.columns.values]


#Second part of grouping 
shake_lst = ['cheeseburger', 'fries', 'hamburger', 'shackburger']

# Filter rows where 'menu_item' contains any item in mcd_lst
menu_items_shake = ca_ff_shake[ca_ff_shake['menu_item'].isin(shake_lst)].sort_values('menu_item')
menu_items_shake = menu_items_shake.drop_duplicates(subset=['restaurant_name', 'restaurant_location', 'menu_item'])

grouped_shake_2 = menu_items_shake.groupby(['restaurant_name', 'restaurant_location'])['menu_item_price'].agg(price_list).reset_index()

grouped_shake_2[['cheeseburger', 'fries', 'hamburger', 'specialty_item']] = grouped_shake_2['menu_item_price'].apply(pd.Series)
grouped_shake_2.drop(columns=['menu_item_price'], inplace=True)

#Merging the grouped dfs together 
merged_shake = pd.merge(grouped_shake, grouped_shake_2, on=['restaurant_name', 'restaurant_location'], how='inner')
merged_shake['combo'] = np.nan

Sonic

In [59]:
#FLAG many missing prices for fries and 
#Filter to just Sonic 
ca_ff_sonic = ca_ff_[ca_ff_['restaurant_name'] == 'Sonic']

#First part of grouping 

agg_funcs = {
    'menu_item_price': ['mean', 'median', 'std'],  # calculate the average, median, and standard dev PRICE
    'restaurant_rating': 'mean', # calculate the average RATING 
    'menu_item' : 'count'
}

grouped_sonic = ca_ff_sonic.groupby(['restaurant_name','restaurant_location']).agg(agg_funcs).reset_index()
grouped_sonic.columns = [' '.join(col).strip() for col in grouped_sonic.columns.values]


#Second part of grouping 
sonic_lst = ['fries', 'quarter pound double cheeseburger', 'supersonic double cheeseburger', 'supersonic double cheeseburger combo']

# Filter rows where 'menu_item' contains any item in mcd_lst
menu_items_sonic = ca_ff_sonic[ca_ff_sonic['menu_item'].isin(sonic_lst)].sort_values('menu_item')
menu_items_sonic = menu_items_sonic.drop_duplicates(subset=['restaurant_name', 'restaurant_location', 'menu_item'])

grouped_sonic_2 = menu_items_sonic.groupby(['restaurant_name', 'restaurant_location'])['menu_item_price'].agg(price_list).reset_index()

grouped_sonic_2[['fries', 'cheeseburger', 'specialty_item', 'combo']] = grouped_sonic_2['menu_item_price'].apply(pd.Series)
grouped_sonic_2.drop(columns=['menu_item_price'], inplace=True)

#Merging the grouped dfs together 
merged_sonic = pd.merge(grouped_sonic, grouped_sonic_2, on=['restaurant_name', 'restaurant_location'], how='inner')
merged_sonic['hamburger'] = np.nan

In [60]:
merged_sonic

Unnamed: 0,restaurant_name,restaurant_location,menu_item_price mean,menu_item_price median,menu_item_price std,restaurant_rating mean,menu_item count,fries,cheeseburger,specialty_item,combo,hamburger
0,Sonic,"101 main st, vista, ca, 92084, us",2.258415,0.0,3.26482,4.2,164,0.0,4.87,10.36,0.0,
1,Sonic,"10354 trinity parkway, stockton, ca, 95219, us",1.909405,0.0,2.699659,4.9,84,0.0,4.87,9.02,0.0,
2,Sonic,"10515 mission gorge rd, santee, ca, 92071, us",2.258415,0.0,3.26482,3.9,164,0.0,4.87,10.36,0.0,
3,Sonic,"1060 west renaissance parkway, rialto, ca, 923...",2.204767,0.0,3.002143,4.4,344,0.0,4.75,8.89,0.0,
4,Sonic,"1070 e huntington dr, duarte, ca, 91010, us",2.190698,0.0,2.981735,4.6,86,0.0,4.87,8.53,0.0,
5,Sonic,"11370 4th street, rancho cucamonga, ca, 91730, us",1.843488,0.0,2.57698,4.4,258,0.0,4.75,8.28,0.0,
6,Sonic,"11780 amargosa road, victorville, ca, 92392, us",1.84625,0.0,2.552971,4.8,160,0.0,,,,
7,Sonic,"1202 magnolia ave, corona, ca, 92881, us",2.116951,0.0,3.042066,4.7,246,0.0,4.5,9.75,0.0,
8,Sonic,"1215 grand ave, san marcos, ca, 92078, us",2.108171,0.0,3.042437,4.3,82,0.0,4.5,9.75,0.0,
9,Sonic,"1400 north park boulevard, pittsburg, ca, 9456...",2.469186,0.0,3.326491,4.3,86,0.0,4.87,10.24,0.0,


Five Guys 

In [64]:
#FLAG many missing prices for fries and 
#Filter to just Sonic 
ca_ff_five = ca_ff_[ca_ff_['restaurant_name'] == 'Five Guys']

#First part of grouping 

agg_funcs = {
    'menu_item_price': ['mean', 'median', 'std'],  # calculate the average, median, and standard dev PRICE
    'restaurant_rating': 'mean', # calculate the average RATING 
    'menu_item' : 'count'
}

grouped_five = ca_ff_five.groupby(['restaurant_name','restaurant_location']).agg(agg_funcs).reset_index()
grouped_five.columns = [' '.join(col).strip() for col in grouped_five.columns.values]


#Second part of grouping 
five_lst = ['cheeseburger', 'little cheeseburger', 'little hamburger', 'regular fries']

# Filter rows where 'menu_item' contains any item in mcd_lst
menu_items_five = ca_ff_five[ca_ff_five['menu_item'].isin(five_lst)].sort_values('menu_item')
menu_items_five = menu_items_five.drop_duplicates(subset=['restaurant_name', 'restaurant_location', 'menu_item'])

grouped_five_2 = menu_items_five.groupby(['restaurant_name', 'restaurant_location'])['menu_item_price'].agg(price_list).reset_index()

grouped_five_2[['specialty_item', 'cheeseburger', 'hamburger', 'fries']] = grouped_five_2['menu_item_price'].apply(pd.Series)
grouped_five_2.drop(columns=['menu_item_price'], inplace=True)

#Merging the grouped dfs together 
merged_five = pd.merge(grouped_five, grouped_five_2, on=['restaurant_name', 'restaurant_location'], how='inner')
merged_five['combo'] = np.nan

In [73]:
#FLAG many missing prices for fries and 
#Filter to just Sonic 
ca_ff_habit = ca_ff_[ca_ff_['restaurant_name'] == 'The Habit']

#First part of grouping 

agg_funcs = {
    'menu_item_price': ['mean', 'median', 'std'],  # calculate the average, median, and standard dev PRICE
    'restaurant_rating': 'mean', # calculate the average RATING 
    'menu_item' : 'count'
}

grouped_habit = ca_ff_habit.groupby(['restaurant_name','restaurant_location']).agg(agg_funcs).reset_index()
grouped_habit.columns = [' '.join(col).strip() for col in grouped_habit.columns.values]


#Second part of grouping 
habit_lst = ['2 original double char meal', 'charburger', 'charburger with cheese', 'double char', 'french fries']

# Filter rows where 'menu_item' contains any item in mcd_lst
menu_items_habit = ca_ff_habit[ca_ff_habit['menu_item'].isin(habit_lst)].sort_values('menu_item')
menu_items_habit = menu_items_habit.drop_duplicates(subset=['restaurant_name', 'restaurant_location', 'menu_item'])

grouped_habit_2 = menu_items_habit.groupby(['restaurant_name', 'restaurant_location'])['menu_item_price'].agg(price_list).reset_index()

grouped_habit_2[['combo', 'hamburger', 'cheeseburger', 'specialty_item','fries']] = grouped_habit_2['menu_item_price'].apply(pd.Series)
grouped_habit_2.drop(columns=['menu_item_price'], inplace=True)

#Merging the grouped dfs together 
merged_habit = pd.merge(grouped_habit, grouped_habit_2, on=['restaurant_name', 'restaurant_location'], how='inner')
#merged_five['combo'] = np.nan

Carl's Jr.

In [76]:
#FLAG many missing prices for fries and 
#Filter to just Sonic 
ca_ff_carls = ca_ff_[ca_ff_['restaurant_name'] == 'Carls Jr']

#First part of grouping 

agg_funcs = {
    'menu_item_price': ['mean', 'median', 'std'],  # calculate the average, median, and standard dev PRICE
    'restaurant_rating': 'mean', # calculate the average RATING 
    'menu_item' : 'count'
}

grouped_carls = ca_ff_carls.groupby(['restaurant_name','restaurant_location']).agg(agg_funcs).reset_index()
grouped_carls.columns = [' '.join(col).strip() for col in grouped_carls.columns.values]


#Second part of grouping 
carls_lst = ['big hamburger', 'california classic double cheeseburger', 'naturalcut french fries', 'single big carl', 'single big carl combo']

# Filter rows where 'menu_item' contains any item in mcd_lst
menu_items_carls = ca_ff_carls[ca_ff_carls['menu_item'].isin(carls_lst)].sort_values('menu_item')
menu_items_carls = menu_items_carls.drop_duplicates(subset=['restaurant_name', 'restaurant_location', 'menu_item'])

grouped_carls_2 = menu_items_carls.groupby(['restaurant_name', 'restaurant_location'])['menu_item_price'].agg(price_list).reset_index()

grouped_carls_2[['hamburger', 'cheeseburger', 'fries', 'specialty_item','combo']] = grouped_carls_2['menu_item_price'].apply(pd.Series)
grouped_carls_2.drop(columns=['menu_item_price'], inplace=True)

#Merging the grouped dfs together 
merged_carls = pd.merge(grouped_carls, grouped_carls_2, on=['restaurant_name', 'restaurant_location'], how='inner')
#merged_five['combo'] = np.nan

In [77]:
merged_carls

Unnamed: 0,restaurant_name,restaurant_location,menu_item_price mean,menu_item_price median,menu_item_price std,restaurant_rating mean,menu_item count,hamburger,cheeseburger,fries,specialty_item,combo
0,Carls Jr,"1 hallidie plaza, san francisco, ca, 94102, us",5.611529,4.82,5.834491,4.5,255,4.51,7.02,5.70,,
1,Carls Jr,"1000 farmers ln, santa rosa, ca, 95405, us",9.303038,9.34,5.093219,4.2,79,4.05,5.15,4.66,,
2,Carls Jr,"101 ferrari ranch rd, lincoln, ca, 95648, us",10.281327,10.69,5.678946,4.8,98,6.82,6.18,4.50,8.37,14.18
3,Carls Jr,"101 s union ave, bakersfield, ca, 93307, us",10.481429,10.69,5.856942,4.5,98,6.82,6.18,4.50,8.37,14.18
4,Carls Jr,"10770 macarthur blvd, oakland, ca, 94605, us",4.654235,3.93,4.895414,4.8,170,3.69,5.75,4.65,,
...,...,...,...,...,...,...,...,...,...,...,...,...
93,Carls Jr,"871 marina village pkwy, alameda, ca, 94501, us",5.239533,4.48,4.932509,4.8,535,3.94,6.16,4.99,,
94,Carls Jr,"8919 limonite ave, riverside, ca, 92509, us",3.806897,2.85,4.150414,5.0,87,5.15,5.15,4.55,,
95,Carls Jr,"899 cherry ave, san bruno, ca, 94066, us",4.621882,3.93,4.818046,4.7,340,3.69,5.75,4.65,,
96,Carls Jr,"915 main st, oakley, ca, 94561, us",9.658214,9.92,5.757643,4.7,84,6.82,6.18,4.50,,


In [78]:
uber_eats_ff_rnd1_prices = pd.concat([merged_mcd, merged_jack, merged_wendy, merged_shake, merged_bk, merged_sonic, merged_carls, merged_habit, merged_five])

In [79]:
uber_eats_ff_rnd1_prices

Unnamed: 0,restaurant_name,restaurant_location,menu_item_price mean,menu_item_price median,menu_item_price std,restaurant_rating mean,menu_item count,specialty_item,combo,cheeseburger,hamburger,fries
0,McDonald,"100 e redlands blvd, san bernardino, ca, 92408...",5.752083,4.79,4.912890,4.6,144,7.89,13.99,4.39,3.49,5.89
1,McDonald,"1000 e 4th st, long beach, ca, 90802, us",6.172979,4.89,5.901370,4.7,141,8.29,14.39,4.39,3.89,5.29
2,McDonald,"1000 real rd, bakersfield, ca, 93309, us",5.209320,3.69,6.140426,4.9,147,6.19,11.39,3.09,2.79,4.59
3,McDonald,"10051 bruceville road, elk grove, ca, 95757, us",6.915203,4.74,9.726763,4.4,148,7.69,11.79,4.09,3.69,4.99
4,McDonald,"1009 n wilson way, stockton, ca, 95205-4217, us",6.868792,4.69,9.710387,4.5,149,7.69,11.79,4.09,3.69,4.99
...,...,...,...,...,...,...,...,...,...,...,...,...
95,Five Guys,"915 playa ave, sand city, ca, 93955, us",8.420000,8.87,3.004996,4.7,28,12.83,,10.43,9.59,7.31
96,Five Guys,"9257 laguna springs dr, elk grove, ca, 95758, us",9.908621,10.07,3.739646,4.7,58,15.47,,12.47,11.15,8.75
97,Five Guys,"929 n. milliken avenue suite 103, ontario, ca,...",8.484286,8.75,3.117877,4.6,56,13.19,,10.43,9.23,7.19
98,Five Guys,"933 e. hillsdale, foster city, ca, 94404, us",8.737143,9.11,3.329333,4.6,112,13.79,,10.31,9.83,7.19
