In [1]:
import json
import pandas as pd
import numpy as np

file_path = "raw_final_prices.jsonl"

# Load the JSON data from the selected file
with open(file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

# Initialize an empty list to store the rows of our DataFrame
df_rows = []

# Process the JSON data to populate the df_rows list
for inputted_location, restaurants in data.items():
    for restaurant_name, details in restaurants.items():
        restaurant_location = details.get('location', '')
        restaurant_rating = details.get('rating', 'Not Rated')# Default to 'Not Rated' if there's no rating
        restaurant_no_rating = details.get('number of ratings')
        restaurant_distance = details.get('distance')
        for item, price in details.get('menu', {}).items():
            df_rows.append({
                'restaurant_name': restaurant_name,
                'menu_item': item,
                'menu_item_price': price,
                'restaurant_location': restaurant_location,
                'inputted_location': inputted_location,
                'restaurant_rating': restaurant_rating,  # Add the rating to each row
                'number_of_ratings': restaurant_no_rating,
                'restaurant_distance': restaurant_distance
            })


# Convert the list of rows into a DataFrame
df = pd.DataFrame(df_rows)


In [2]:
df['restaurant_name'] = df['restaurant_name'].str.replace(r'_[0-9]+$', '', regex=True)

In [3]:
df

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
0,McDonald,Medium French Fries,4.69,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
1,McDonald,Big Mac®,6.79,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
2,McDonald,10 pc. Chicken McNuggets®,6.69,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
3,McDonald,Filet-O-Fish®,6.49,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
4,McDonald,2 Cheeseburger Meal,10.99,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
...,...,...,...,...,...,...,...,...
121824,The Habit,Ranch,0.63,"1025 Broadbeck Dr, Thousand Oaks, CA, 91320, US","300 W 3rd St, Oxnard, CA 93030",4.3,11,13.9 mi
121825,The Habit,BBQ Sauce,0.63,"1025 Broadbeck Dr, Thousand Oaks, CA, 91320, US","300 W 3rd St, Oxnard, CA 93030",4.3,11,13.9 mi
121826,The Habit,Spicy Red Pepper Sauce,0.63,"1025 Broadbeck Dr, Thousand Oaks, CA, 91320, US","300 W 3rd St, Oxnard, CA 93030",4.3,11,13.9 mi
121827,The Habit,Teriyaki Sauce,0.63,"1025 Broadbeck Dr, Thousand Oaks, CA, 91320, US","300 W 3rd St, Oxnard, CA 93030",4.3,11,13.9 mi


In [4]:
zips_df = pd.read_csv('uszips.csv')[['zip', 'county_name']]

In [5]:
df_copy = df.copy()
df_copy['restaurant_zipcode'] = df_copy['restaurant_location'].str.extract("\s(\d{5})")

In [6]:
# Encoding county in dataframe
df_copy = df_copy.dropna()
df_copy['restaurant_zipcode'] = df_copy['restaurant_zipcode'].astype(int)
merged_df = df_copy.merge(zips_df, left_on = 'restaurant_zipcode',  right_on = 'zip')
df_county = merged_df.drop("zip", axis = 1)
df_county.head(5)

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance,restaurant_zipcode,county_name
0,McDonald,Medium French Fries,4.69,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi,94612,Alameda
1,McDonald,Big Mac®,6.79,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi,94612,Alameda
2,McDonald,10 pc. Chicken McNuggets®,6.69,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi,94612,Alameda
3,McDonald,Filet-O-Fish®,6.49,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi,94612,Alameda
4,McDonald,2 Cheeseburger Meal,10.99,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi,94612,Alameda


In [7]:
# Add the scraping date
df_county['date_of_scraping'] = "03/15/2024"

In [8]:
# Number of restaurant locations per county
county_rest_loc = df_county.groupby(['county_name', 'restaurant_location']).count().reset_index()[["county_name", "restaurant_location"]]
no_loc_per_county = county_rest_loc.groupby('county_name').count().reset_index()
print(sum(no_loc_per_county['restaurant_location']))
no_loc_per_county.head(10)

982


Unnamed: 0,county_name,restaurant_location
0,Alameda,68
1,Contra Costa,45
2,Fresno,32
3,Kern,33
4,Kings,2
5,Los Angeles,117
6,Marin,12
7,Monterey,17
8,Napa,5
9,Orange,74


In [9]:
# Number of restaurant names per county
county_rest_name = df_county.groupby(['county_name', 'restaurant_name']).count().reset_index()[["county_name", "restaurant_name"]]
no_name_per_county = county_rest_name.groupby('county_name').count().reset_index()
no_name_per_county.head(10)

Unnamed: 0,county_name,restaurant_name
0,Alameda,9
1,Contra Costa,9
2,Fresno,7
3,Kern,8
4,Kings,2
5,Los Angeles,9
6,Marin,7
7,Monterey,7
8,Napa,4
9,Orange,9


In [10]:
df['restaurant_name'].value_counts()

restaurant_name
McDonald           28394
Jack in the Box    25709
Wendy              16333
The Habit          14040
Burger King        13538
Carls Jr            9413
Sonic               5704
Five Guys           4447
Shake Shack         4251
Name: count, dtype: int64

In [11]:
# Number of restaurant locations per inputted_location 
count_loc = df.groupby(['inputted_location', 'restaurant_location']).count().reset_index()[['inputted_location', 'restaurant_location']]
no_per_county = count_loc.groupby('inputted_location').count().reset_index()
no_per_county.head()

Unnamed: 0,inputted_location,restaurant_location
0,"1 Dr Carlton B Goodlett Pl, San Francisco, CA ...",46
1,"1 Frank H. Ogawa Plaza, Oakland, CA 94612",47
2,"100 Santa Rosa Ave, Santa Rosa, CA 95404",29
3,"1010 10th St, Modesto, CA 95354",19
4,"110 E Cook St, Santa Maria, CA 93454",17


In [12]:
# Number of restaurant names per inputted location. 9 restaurant names means 
# that inputted location has atleast one location per each restaurant. 
res_count_loc = df.groupby(['inputted_location', 'restaurant_name']).count().reset_index()[['inputted_location', 'restaurant_name']]
res_per_county = res_count_loc.groupby('inputted_location').count().reset_index()
res_per_county.head()

Unnamed: 0,inputted_location,restaurant_name
0,"1 Dr Carlton B Goodlett Pl, San Francisco, CA ...",9
1,"1 Frank H. Ogawa Plaza, Oakland, CA 94612",9
2,"100 Santa Rosa Ave, Santa Rosa, CA 95404",8
3,"1010 10th St, Modesto, CA 95354",8
4,"110 E Cook St, Santa Maria, CA 93454",7


In [13]:
# Number of unique locations
len(df['restaurant_location'].unique())

986

In [14]:
# Number of unique counties
len(df['inputted_location'].unique())

36

In [15]:
# Number of locations per franchise
grp_df = df.groupby(['restaurant_name', 'restaurant_location']).count().reset_index()
grp_df.groupby('restaurant_name').size()

restaurant_name
Burger King        151
Carls Jr            85
Five Guys           92
Jack in the Box    171
McDonald           179
Shake Shack         35
Sonic               29
The Habit          121
Wendy              122
dtype: int64

In [16]:
# Number of items per location for each franchise
data_process = grp_df[['restaurant_name', 'restaurant_location', 'menu_item']]
data_process

Unnamed: 0,restaurant_name,restaurant_location,menu_item
0,Burger King,"10055 Cedar Avenue, Bloomington, CA, 92316, US",78
1,Burger King,"1030 Mclaughlin Ave., San Jose, CA, 95122, US",80
2,Burger King,"1042 North Carpenter Road, Modesto, CA, 95351, US",79
3,Burger King,"111, Colma, CA, 94014, US",81
4,Burger King,"1153 North H Street, Lompoc, CA, 93436, US",78
...,...,...,...
980,Wendy,"8871 Bond Road, Elk Grove, CA, 95624, US",96
981,Wendy,"9180 Rosedale Hwy, Bakersfield, CA, 93312, US",95
982,Wendy,"924 Pleasant Grove Blvd, Roseville, CA, 95678, US",96
983,Wendy,"960 EASTLAKE PARKWAY, CHULA VISTA, CA, 91914, US",95


In [17]:
# Mean, st. dev., min and max number of items across all restaurants
mean_no = np.mean(data_process['menu_item'].astype(float))
std_no = np.std(data_process['menu_item'].astype(float))
min_no = np.min(data_process['menu_item'].astype(float))
max_no = np.max(data_process['menu_item'].astype(float))
mean_no, std_no, min_no, max_no

(123.43045685279188, 67.14440447331073, 24.0, 581.0)

In [18]:
# Mean, st. dev., min and max number of items for McDonalds
df_mcd = data_process[data_process['restaurant_name'] == 'McDonald']
mean_mcd = np.mean(df_mcd['menu_item'].astype(float))
std_mcd = np.std(df_mcd['menu_item'].astype(float))
min_mcd = np.min(df_mcd['menu_item'].astype(float))
max_mcd = np.max(df_mcd['menu_item'].astype(float))
mean_mcd, std_mcd, min_mcd, max_mcd

(157.75977653631284, 39.09326124551902, 121.0, 318.0)

In [19]:
# Mean, st. dev., min and max number of items for Jack in the Box
df_jb = data_process[data_process['restaurant_name'] == 'Jack in the Box']
mean_jb = np.mean(df_jb['menu_item'].astype(float))
std_jb = np.std(df_jb['menu_item'].astype(float))
min_jb = np.min(df_jb['menu_item'].astype(float))
max_jb = np.max(df_jb['menu_item'].astype(float))
mean_jb, std_jb, min_jb, max_jb

(150.3450292397661, 53.54129647765281, 129.0, 411.0)

In [20]:
# Mean, st. dev., min and max number of items for Wendy
df_wd = data_process[data_process['restaurant_name'] == 'Wendy']
mean_wd = np.mean(df_wd['menu_item'].astype(float))
std_wd = np.std(df_wd['menu_item'].astype(float))
min_wd = np.min(df_wd['menu_item'].astype(float))
max_wd = np.max(df_wd['menu_item'].astype(float))
mean_wd, std_wd, min_wd, max_wd

(133.0983606557377, 69.88824918655303, 88.0, 470.0)

In [21]:
# Mean, st. dev., min and max number of items for The Habit
df_hb = data_process[data_process['restaurant_name'] == 'The Habit']
mean_hb = np.mean(df_hb['menu_item'].astype(float))
std_hb = np.std(df_hb['menu_item'].astype(float))
min_hb = np.min(df_hb['menu_item'].astype(float))
max_hb = np.max(df_hb['menu_item'].astype(float))
mean_hb, std_hb, min_hb, max_hb

(116.03305785123968, 72.43805377321507, 64.0, 395.0)

In [22]:
# Mean, st. dev., min and max number of items for Burger King
df_bk = data_process[data_process['restaurant_name'] == 'Burger King']
mean_bk = np.mean(df_bk['menu_item'].astype(float))
std_bk = np.std(df_bk['menu_item'].astype(float))
min_bk = np.min(df_bk['menu_item'].astype(float))
max_bk = np.max(df_bk['menu_item'].astype(float))
mean_bk, std_bk, min_bk, max_bk

(89.65562913907284, 32.00042349835987, 62.0, 237.0)

In [23]:
# Mean, st. dev., min and max number of items for Carls Jr
df_cj = data_process[data_process['restaurant_name'] == 'Carls Jr']
mean_cj = np.mean(df_cj['menu_item'].astype(float))
std_cj = np.std(df_cj['menu_item'].astype(float))
min_cj = np.min(df_cj['menu_item'].astype(float))
max_cj = np.max(df_cj['menu_item'].astype(float))
mean_cj, std_cj, min_cj, max_cj

(110.74117647058823, 62.162434834056086, 48.0, 428.0)

In [24]:
# Mean, st. dev., min and max number of items for Sonic
df_s = data_process[data_process['restaurant_name'] == 'Sonic']
mean_s = np.mean(df_s['menu_item'].astype(float))
std_s = np.std(df_s['menu_item'].astype(float))
min_s = np.min(df_s['menu_item'].astype(float))
max_s = np.max(df_s['menu_item'].astype(float))
mean_s, std_s, min_s, max_s

(196.68965517241378, 112.61196832586633, 80.0, 581.0)

In [25]:
# Mean, st. dev., min and max number of items for Five Guys
df_fg = data_process[data_process['restaurant_name'] == 'Five Guys']
mean_fg = np.mean(df_fg['menu_item'].astype(float))
std_fg = np.std(df_fg['menu_item'].astype(float))
min_fg = np.min(df_fg['menu_item'].astype(float))
max_fg = np.max(df_fg['menu_item'].astype(float))
mean_fg, std_fg, min_fg, max_fg

(48.33695652173913, 27.0331025636256, 24.0, 116.0)

In [26]:
# Mean, st. dev., min and max number of items for Shake Shack
df_ss = data_process[data_process['restaurant_name'] == 'Shake Shack']
mean_ss = np.mean(df_ss['menu_item'].astype(float))
std_ss = np.std(df_ss['menu_item'].astype(float))
min_ss = np.min(df_ss['menu_item'].astype(float))
max_ss = np.max(df_ss['menu_item'].astype(float))
mean_ss, std_ss, min_ss, max_ss

(121.45714285714286, 94.8289717204137, 43.0, 368.0)

In [27]:
# Mean, st. dev., min and max of all prices
mean = np.mean(df['menu_item_price'].astype(float))
std = np.std(df['menu_item_price'].astype(float))
minimum = np.min(df['menu_item_price'].astype(float))
maximum = np.max(df['menu_item_price'].astype(float))
mean, std, minimum, maximum

(6.990624646020243, 6.177492322419106, 0.0, 69.29)

In [28]:
# Mean, st. dev., min and a max of prices for the items called "Hamburger"
only_hb = df[(df['menu_item'].str.contains('Hamburger')) & (~df['menu_item'].str.contains('Meal|Combo'))]
hb_avg = np.mean(only_hb['menu_item_price'].astype(float))
hb_std = np.std(only_hb['menu_item_price'].astype(float))
hb_min = np.min(only_hb['menu_item_price'].astype(float))
hb_max = np.max(only_hb['menu_item_price'].astype(float))
hb_avg, hb_std, hb_min, hb_max

(6.636036269430052, 3.6367910026027053, 1.7, 14.39)

In [29]:
# Issue with scraping the restaurant rating -> scraped some distances and names instead of ratings. 
df['restaurant_rating'].unique()
df[df['restaurant_rating'] == '4.2 mi'].head(5)

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
10073,Burger King,16 Pc. Chicken Nuggets Meal,14.99,"5400 Ygnacio Valley Road, Concord, CA, 94521, US","1950 Parkside Dr, Concord, CA 94519",4.2 mi,16,Whopper®
10074,Burger King,Whopper®,10.29,"5400 Ygnacio Valley Road, Concord, CA, 94521, US","1950 Parkside Dr, Concord, CA 94519",4.2 mi,16,Whopper®
10075,Burger King,Bacon King Meal,20.59,"5400 Ygnacio Valley Road, Concord, CA, 94521, US","1950 Parkside Dr, Concord, CA 94519",4.2 mi,16,Whopper®
10076,Burger King,Whopper® Meal,16.89,"5400 Ygnacio Valley Road, Concord, CA, 94521, US","1950 Parkside Dr, Concord, CA 94519",4.2 mi,16,Whopper®
10077,Burger King,4 Pc. Mozzarella Sticks,5.59,"5400 Ygnacio Valley Road, Concord, CA, 94521, US","1950 Parkside Dr, Concord, CA 94519",4.2 mi,16,Whopper®


In [30]:
# Restaurant rating per location with distances/names filtered out
filtered_rating = df[~df['restaurant_rating'].str.contains('mi|Burger King|McDonald\'s®|Jack in the Box|Wendy\'s')]
filtered_rating['restaurant_rating'] = filtered_rating['restaurant_rating'].astype(float)
avg_rating_df = filtered_rating.groupby(['restaurant_name', 'restaurant_location'])[['restaurant_rating']].mean().reset_index()
avg_rating_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_rating['restaurant_rating'] = filtered_rating['restaurant_rating'].astype(float)


Unnamed: 0,restaurant_name,restaurant_location,restaurant_rating
0,Burger King,"10055 Cedar Avenue, Bloomington, CA, 92316, US",4.5
1,Burger King,"1030 Mclaughlin Ave., San Jose, CA, 95122, US",4.4
2,Burger King,"1042 North Carpenter Road, Modesto, CA, 95351, US",4.0
3,Burger King,"111, Colma, CA, 94014, US",4.5
4,Burger King,"1181 Old Oakland Road, San Jose, CA, 95112, US",4.5
...,...,...,...
935,Wendy,"8871 Bond Road, Elk Grove, CA, 95624, US",4.2
936,Wendy,"9180 Rosedale Hwy, Bakersfield, CA, 93312, US",4.4
937,Wendy,"924 Pleasant Grove Blvd, Roseville, CA, 95678, US",4.4
938,Wendy,"960 EASTLAKE PARKWAY, CHULA VISTA, CA, 91914, US",4.7


In [31]:
# Mean, st. dev., min and max rating across all restaurants
mean_rt = np.mean(avg_rating_df['restaurant_rating'].astype(float))
std_rt = np.std(avg_rating_df['restaurant_rating'].astype(float))
min_rt = np.min(avg_rating_df['restaurant_rating'].astype(float))
max_rt = np.max(avg_rating_df['restaurant_rating'].astype(float))
mean_rt, std_rt, min_rt, max_rt

(4.52561170212766, 0.2656365763860919, 2.6, 5.0)

In [32]:
# Mean, st. dev., min and max rating for McDonalds
data_mcd = avg_rating_df[avg_rating_df['restaurant_name'] == 'McDonald']
mean_mcd_rating = np.mean(data_mcd['restaurant_rating'].astype(float))
std_mcd_rating = np.std(data_mcd['restaurant_rating'].astype(float))
min_mcd_rating = np.min(data_mcd['restaurant_rating'].astype(float))
max_mcd_rating = np.max(data_mcd['restaurant_rating'].astype(float))
mean_mcd_rating, std_mcd_rating, min_mcd_rating, max_mcd_rating

(4.467159763313609, 0.2981226931250907, 2.6, 5.0)

In [33]:
# Mean, st. dev., min and max rating for Jack in the Box
data_filter = avg_rating_df[avg_rating_df['restaurant_name'] == 'Jack in the Box']
mean_rating = np.mean(data_filter['restaurant_rating'].astype(float))
std_rating = np.std(data_filter['restaurant_rating'].astype(float))
min_rating = np.min(data_filter['restaurant_rating'].astype(float))
max_rating = np.max(data_filter['restaurant_rating'].astype(float))
mean_rating, std_rating, min_rating, max_rating

(4.4225609756097555, 0.2523852599751157, 3.5, 5.0)

In [34]:
# Mean, st. dev., min and max rating for Wendy
data_filter = avg_rating_df[avg_rating_df['restaurant_name'] == 'Wendy']
mean_rating = np.mean(data_filter['restaurant_rating'].astype(float))
std_rating = np.std(data_filter['restaurant_rating'].astype(float))
min_rating = np.min(data_filter['restaurant_rating'].astype(float))
max_rating = np.max(data_filter['restaurant_rating'].astype(float))
mean_rating, std_rating, min_rating, max_rating

(4.532773109243697, 0.27412480862822075, 3.3, 5.0)

In [35]:
# Mean, st. dev., min and max rating for The Habit
data_filter = avg_rating_df[avg_rating_df['restaurant_name'] == 'The Habit']
mean_rating = np.mean(data_filter['restaurant_rating'].astype(float))
std_rating = np.std(data_filter['restaurant_rating'].astype(float))
min_rating = np.min(data_filter['restaurant_rating'].astype(float))
max_rating = np.max(data_filter['restaurant_rating'].astype(float))
mean_rating, std_rating, min_rating, max_rating

(4.578205128205128, 0.22469206044046944, 3.8999999999999995, 5.0)

In [36]:
# Mean, st. dev., min and max rating for Burger King
data_filter = avg_rating_df[avg_rating_df['restaurant_name'] == 'Burger King']
mean_rating = np.mean(data_filter['restaurant_rating'].astype(float))
std_rating = np.std(data_filter['restaurant_rating'].astype(float))
min_rating = np.min(data_filter['restaurant_rating'].astype(float))
max_rating = np.max(data_filter['restaurant_rating'].astype(float))
mean_rating, std_rating, min_rating, max_rating

(4.498571428571428, 0.28585353749421955, 3.5, 5.0)

In [37]:
# Mean, st. dev., min and max rating for Carls Jr
data_filter = avg_rating_df[avg_rating_df['restaurant_name'] == 'Carls Jr']
mean_rating = np.mean(data_filter['restaurant_rating'].astype(float))
std_rating = np.std(data_filter['restaurant_rating'].astype(float))
min_rating = np.min(data_filter['restaurant_rating'].astype(float))
max_rating = np.max(data_filter['restaurant_rating'].astype(float))
mean_rating, std_rating, min_rating, max_rating

(4.6118421052631575, 0.20772394716822323, 4.0, 5.0)

In [38]:
# Mean, st. dev., min and max rating for Sonic
data_filter = avg_rating_df[avg_rating_df['restaurant_name'] == 'Sonic']
mean_rating = np.mean(data_filter['restaurant_rating'].astype(float))
std_rating = np.std(data_filter['restaurant_rating'].astype(float))
min_rating = np.min(data_filter['restaurant_rating'].astype(float))
max_rating = np.max(data_filter['restaurant_rating'].astype(float))
mean_rating, std_rating, min_rating, max_rating

(4.458620689655173, 0.27230464858482256, 3.8, 5.0)

In [39]:
# Mean, st. dev., min and max rating for Five Guys
data_filter = avg_rating_df[avg_rating_df['restaurant_name'] == 'Five Guys']
mean_rating = np.mean(data_filter['restaurant_rating'].astype(float))
std_rating = np.std(data_filter['restaurant_rating'].astype(float))
min_rating = np.min(data_filter['restaurant_rating'].astype(float))
max_rating = np.max(data_filter['restaurant_rating'].astype(float))
mean_rating, std_rating, min_rating, max_rating

(4.685597826086957, 0.161902484054755, 4.2, 5.0)

In [40]:
# Mean, st. dev., min and max rating for Shake Shack
data_filter = avg_rating_df[avg_rating_df['restaurant_name'] == 'Shake Shack']
mean_rating = np.mean(data_filter['restaurant_rating'].astype(float))
std_rating = np.std(data_filter['restaurant_rating'].astype(float))
min_rating = np.min(data_filter['restaurant_rating'].astype(float))
max_rating = np.max(data_filter['restaurant_rating'].astype(float))
mean_rating, std_rating, min_rating, max_rating

(4.6499999999999995, 0.10914103126634989, 4.3, 4.8)

In [41]:
mcd_items = ['Hamburger', 'Cheeseburger', 'Big Mac®', 'Medium French Fries', 'Big Mac® Meal']
mcd = df[df['restaurant_name'] == 'McDonald']
mcd_data = mcd[mcd['menu_item'].isin(mcd_items)]
mcd_data.head(5)

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
0,McDonald,Medium French Fries,4.69,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
1,McDonald,Big Mac®,6.79,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
7,McDonald,Cheeseburger,3.49,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
10,McDonald,Big Mac® Meal,12.59,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
155,McDonald,Hamburger,2.89,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi


In [42]:
jb_items = ['Jr. Jumbo Jack®', 'Jr. Jumbo Jack® Cheeseburger', 'Jumbo Jack®', 'Large French Fry', 'Large Jumbo Jack®  Combo']
jb = df[df['restaurant_name'] == 'Jack in the Box']
jb_data = jb[jb['menu_item'].isin(jb_items)]
jb_data.head(5)

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
166,Jack in the Box,Large French Fry,4.74,"4425 Telegraph Ave, Oakland, CA, 94609, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.3,100+,1.9 mi
197,Jack in the Box,Large Jumbo Jack® Combo,11.6,"4425 Telegraph Ave, Oakland, CA, 94609, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.3,100+,1.9 mi
263,Jack in the Box,Jr. Jumbo Jack® Cheeseburger,3.74,"4425 Telegraph Ave, Oakland, CA, 94609, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.3,100+,1.9 mi
266,Jack in the Box,Jr. Jumbo Jack®,3.36,"4425 Telegraph Ave, Oakland, CA, 94609, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.3,100+,1.9 mi
268,Jack in the Box,Jumbo Jack®,5.86,"4425 Telegraph Ave, Oakland, CA, 94609, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.3,100+,1.9 mi


In [43]:
# Dave's Combo and French Fries -> Priced with add-ons. No alternative French Fries. 
wd_items = ['Jr. Hamburger', 'Jr. Cheeseburger', 'Dave\'s Single®', 'French Fries', 'Dave\'s Combo']
wd = df[df['restaurant_name'] == 'Wendy']
wd_data = wd[wd['menu_item'].isin(wd_items)]
wd_data.head(5)

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
298,Wendy,Dave's Combo,0.0,"5211 Broadway, Oakland, CA, 94618, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,2.4 mi
299,Wendy,French Fries,0.0,"5211 Broadway, Oakland, CA, 94618, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,2.4 mi
321,Wendy,Jr. Cheeseburger,2.92,"5211 Broadway, Oakland, CA, 94618, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,2.4 mi
322,Wendy,Jr. Hamburger,2.43,"5211 Broadway, Oakland, CA, 94618, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,2.4 mi
323,Wendy,Dave's Single®,7.67,"5211 Broadway, Oakland, CA, 94618, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,2.4 mi


In [44]:
hab_items = ['Charburger', 'Charburger with Cheese', '#2 Original Double Char', 'French Fries', '#2 Original Double Char Meal']
habit = df[df['restaurant_name'] == 'The Habit']
habit_data = habit[habit['menu_item'].isin(hab_items)]
habit_data.head(5)

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
625,The Habit,Charburger with Cheese,8.36,"2640 5th St, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,26,1.3 mi
634,The Habit,#2 Original Double Char Meal,17.49,"2640 5th St, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,26,1.3 mi
644,The Habit,Charburger,7.24,"2640 5th St, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,26,1.3 mi
648,The Habit,#2 Original Double Char,17.49,"2640 5th St, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,26,1.3 mi
676,The Habit,French Fries,4.36,"2640 5th St, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,26,1.3 mi


In [45]:
bk_items = ['Whopper® Jr.', "Cheeseburger", "Whopper®", "French Fries", 'Whopper® Meal']
bk = df[df['restaurant_name'] == "Burger King"]
bk_data = bk[bk['menu_item'].isin(bk_items)]
bk_data.head(5)

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
390,Burger King,Whopper® Meal,13.99,"1541 East 12th Street, Oakland, CA, 94606, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,39,1.9 mi
394,Burger King,French Fries,3.99,"1541 East 12th Street, Oakland, CA, 94606, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,39,1.9 mi
398,Burger King,Whopper®,8.59,"1541 East 12th Street, Oakland, CA, 94606, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,39,1.9 mi
418,Burger King,Whopper® Jr.,4.99,"1541 East 12th Street, Oakland, CA, 94606, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,39,1.9 mi
422,Burger King,Cheeseburger,3.49,"1541 East 12th Street, Oakland, CA, 94606, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,39,1.9 mi


In [46]:
# The Big Carl Combo -> Priced with add-ons.
cj_items = ["Big Hamburger", "California Classic Double Cheeseburger", "The Big Carl®", "Natural-Cut French Fries", "The Big Carl® Combo"]
cj = df[df['restaurant_name'] == 'Carls Jr']
cj_data = cj[cj['menu_item'].isin(cj_items)]
cj_data.head(5)

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
3359,Carls Jr,Natural-Cut French Fries,4.99,"871 Marina Village Pkwy, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.8,25,1.5 mi
3374,Carls Jr,The Big Carl® Combo,0.0,"871 Marina Village Pkwy, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.8,25,1.5 mi
3399,Carls Jr,California Classic Double Cheeseburger,6.16,"871 Marina Village Pkwy, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.8,25,1.5 mi
3400,Carls Jr,The Big Carl®,9.52,"871 Marina Village Pkwy, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.8,25,1.5 mi
3401,Carls Jr,Big Hamburger,3.94,"871 Marina Village Pkwy, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.8,25,1.5 mi


In [47]:
# SuperSonic Double Cheeseburger Combo and Fries -> Priced with add-ons. No alternative Fries.
sonic_items = ['Quarter Pound Double Cheeseburger', 'SuperSONIC® Double Cheeseburger', 'Fries', 'SuperSONIC® Double Cheeseburger Combo']
sonic = df[df['restaurant_name'] == "Sonic"]
sonic_data = sonic[sonic['menu_item'].isin(sonic_items)]
sonic_data.head(5)

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
515,Sonic,Quarter Pound Double Cheeseburger,3.56,"31187 Mission Boulevard, Hayward, CA, 94544, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.1,41,18.3 mi
526,Sonic,Fries,0.0,"31187 Mission Boulevard, Hayward, CA, 94544, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.1,41,18.3 mi
539,Sonic,SuperSONIC® Double Cheeseburger Combo,0.0,"31187 Mission Boulevard, Hayward, CA, 94544, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.1,41,18.3 mi
571,Sonic,SuperSONIC® Double Cheeseburger,8.56,"31187 Mission Boulevard, Hayward, CA, 94544, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.1,41,18.3 mi
4664,Sonic,Quarter Pound Double Cheeseburger,3.56,"31187 Mission Boulevard, Hayward, CA, 94544, US","3300 Capitol Ave, Fremont, CA 94538",4.1,41,5.5 mi


In [48]:
fg_items = ['Little Hamburger', 'Little Cheeseburger', 'Cheeseburger', "Regular Fries"]
five_g = df[df['restaurant_name'] == "Five Guys"]
fg_data = five_g[five_g['menu_item'].isin(fg_items)]
fg_data.head(5)

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
595,Five Guys,Cheeseburger,13.79,"2254 South Shore Center, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.7,100+,3.6 mi
600,Five Guys,Regular Fries,7.19,"2254 South Shore Center, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.7,100+,3.6 mi
601,Five Guys,Little Cheeseburger,10.31,"2254 South Shore Center, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.7,100+,3.6 mi
609,Five Guys,Little Hamburger,9.83,"2254 South Shore Center, Alameda, CA, 94501, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.7,100+,3.6 mi
3240,Five Guys,Cheeseburger,13.79,"1201 Marina Blvd., San Leandro, CA, 94577, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.7,32,8.8 mi


In [49]:
ss_items = ['Hamburger', 'Cheeseburger', 'ShackBurger', 'Fries']
shake_s = df[df['restaurant_name'] == "Shake Shack"]
ss_data = shake_s[shake_s['menu_item'].isin(ss_items)]
ss_data.head(5)

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
466,Shake Shack,ShackBurger,9.59,"1954 Telegraph Avenue, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.6,100+,0.3 mi
467,Shake Shack,Fries,5.49,"1954 Telegraph Avenue, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.6,100+,0.3 mi
469,Shake Shack,Cheeseburger,9.39,"1954 Telegraph Avenue, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.6,100+,0.3 mi
484,Shake Shack,Hamburger,8.99,"1954 Telegraph Avenue, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.6,100+,0.3 mi
3019,Shake Shack,ShackBurger,9.59,"5614 Bay Street Suite 240, Emeryville, CA, 946...","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.8,100+,2.2 mi


In [50]:
data = pd.concat([mcd_data, jb_data, wd_data, habit_data, bk_data, cj_data, sonic_data, fg_data, ss_data])
data

Unnamed: 0,restaurant_name,menu_item,menu_item_price,restaurant_location,inputted_location,restaurant_rating,number_of_ratings,restaurant_distance
0,McDonald,Medium French Fries,4.69,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
1,McDonald,Big Mac®,6.79,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
7,McDonald,Cheeseburger,3.49,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
10,McDonald,Big Mac® Meal,12.59,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
155,McDonald,Hamburger,2.89,"1330 JACKSON ST, Oakland, CA, 94612, US","1 Frank H. Ogawa Plaza, Oakland, CA 94612",4.5,100+,0.5 mi
...,...,...,...,...,...,...,...,...
111623,Shake Shack,Fries,5.79,"2424 Magowan Drive, Santa Rosa, CA, 95405, US","100 Santa Rosa Ave, Santa Rosa, CA 95404",1.5 mi,11,690 - 980 Cal.
118955,Shake Shack,Fries,5.79,"120 Promenade Way, Thousand Oaks, CA, 91362, US","300 W 3rd St, Oxnard, CA 93030",4.5,50+,20.8 mi
118956,Shake Shack,ShackBurger,9.59,"120 Promenade Way, Thousand Oaks, CA, 91362, US","300 W 3rd St, Oxnard, CA 93030",4.5,50+,20.8 mi
118957,Shake Shack,Cheeseburger,9.39,"120 Promenade Way, Thousand Oaks, CA, 91362, US","300 W 3rd St, Oxnard, CA 93030",4.5,50+,20.8 mi


In [51]:
df.to_csv('fast_food_restaurants_menu.csv', index=False) 