In [96]:
import pandas as pd

In [97]:
df_clean = pd.read_csv("McDonald_s_Reviews.csv", encoding='ISO-8859-1')
df_clean = df_clean[["store_address", "latitude ", "longitude", "rating_count", "review_time", "review", "rating"]]

# Convert the rating column to a numeric one
df_clean.loc[:, "rating (stars)"] = df_clean["rating"].str.split(' ').map(lambda x: x[0] if len(x) > 0 else None)
df_clean = df_clean.drop(["rating"], axis=1)
df_clean["rating (stars)"] = df_clean["rating (stars)"].astype(int)

df_clean.head()

Unnamed: 0,store_address,latitude,longitude,rating_count,review_time,review,rating (stars)
0,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1
1,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4
2,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,Made a mobile order got to the speaker and che...,1
3,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,a month ago,My mc. Crispy chicken sandwich was ï¿½ï¿½ï¿½ï¿...,5
4,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,2 months ago,"I repeat my order 3 times in the drive thru, a...",1


In [98]:
df_clean.describe()

Unnamed: 0,latitude,longitude,rating_count,rating (stars)
count,32736.0,32736.0,33396.0,33396.0
mean,34.442546,-90.647033,2582.135286,3.131363
std,5.344116,16.594844,2507.312674,1.615139
min,25.790295,-121.995421,263.0,1.0
25%,28.65535,-97.792874,1406.0,1.0
50%,33.931261,-81.471414,1795.0,3.0
75%,40.727401,-75.399919,2810.0,5.0
max,44.98141,-73.45982,19682.0,5.0


In [99]:
# check how many restaurants there are
total_restaurants = len(df_clean["store_address"].unique().tolist())

print("Total number of restaurants is: ", total_restaurants)

Total number of restaurants is:  40


Let's break down the location of the stores and begin witht the city and state they are in.

In [100]:
# find all the US states and city which are listed
# we compare if len() if the list with address is at least 3 bc we want to extract the 3rd element (state is at index 2 if exists)
# same holds for the city (city is at index 2 of the list if it exists)
df_clean.loc[:, "state"] = df_clean["store_address"].str.split(", ").map(lambda x: x[2] if len(x) >= 3 else None)
# we need to remove the state code as well and leave only state
df_clean.loc[:, "state"] = df_clean["state"].str.split(" ").map(lambda x: x[0] if x else None)

df_clean.loc[:, "city"] = df_clean["store_address"].str.split(", "). map(lambda x: x[1] if len(x) >= 2 else None)
df_clean.head()

Unnamed: 0,store_address,latitude,longitude,rating_count,review_time,review,rating (stars),state,city
0,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1,TX,Austin
1,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4,TX,Austin
2,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,Made a mobile order got to the speaker and che...,1,TX,Austin
3,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,a month ago,My mc. Crispy chicken sandwich was ï¿½ï¿½ï¿½ï¿...,5,TX,Austin
4,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,2 months ago,"I repeat my order 3 times in the drive thru, a...",1,TX,Austin


Find the unique number of states and cities

In [101]:
# unique nr of states
nr_states = len(df_clean["state"].unique().tolist())
nr_cities = len(df_clean["city"].unique().tolist())

print("Total number of states is: ", nr_states)
print("Total number of cities is: ", nr_cities)

Total number of states is:  12
Total number of cities is:  27


Let's see the avg stars for each state/city

In [102]:
state_avg_stars = df_clean.groupby("state")["rating (stars)"].agg("mean").to_frame().reset_index()
state_avg_stars.sort_values(by="rating (stars)", ascending=False, inplace=True)
state_avg_stars

Unnamed: 0,state,rating (stars)
10,VA,3.697674
1,DC,3.594436
7,PA,3.463035
3,IL,3.362184
6,NY,3.299466
5,NJ,3.25571
0,CA,3.191455
8,TX,3.155415
4,Las,2.978641
2,FL,2.867514


In [103]:
# Create a pivot table to calculate average rating for each state and city
pivot_state_city= pd.pivot_table(df_clean, values=['rating (stars)', "rating_count"], index=['state', 'city'], aggfunc='mean')
pivot_state_city

Unnamed: 0_level_0,Unnamed: 1_level_0,rating (stars),rating_count
state,city,Unnamed: 2_level_1,Unnamed: 3_level_1
CA,Los Angeles,2.805609,2663.186654
CA,North Hollywood,3.377193,1794.561404
CA,San Diego,3.166667,886.682927
CA,Santa Monica,3.192683,3380.280488
CA,Sunnyvale,3.33,1562.0
DC,Washington,3.594436,1270.199122
FL,Fern Park,3.470588,1617.670588
FL,Kissimmee,2.810465,5566.552326
FL,Miami,2.557962,2809.452229
FL,Miami Beach,1.97193,4922.0
