# Analysis of Restaurant Information

### Import necessary packages for EDA

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_csv("vegetarian_restaurants_US_datafiniti_vegetarian_restaurants_US_datafiniti.csv.zip")
df.head()

Unnamed: 0,address,categories,city,claimed,country,cuisines,dateOpened,dateUpdated,descriptions,facebookPageURL,...,menuURL,name,paymentTypes,phones,postalCode,priceRange,province,sic,twitter,websites
0,2871 Gulf To Bay Blvd,"American, American (new), Clearwater Restauran...",Clearwater,,US,,,2014-02-01T04:41:06Z,"Tags: American (New), American, and Trendy Sta...",,...,,Village Inn Restaurant and Bakery,,7277964988,33759,,FL,8423.0,,
1,605 W Main St,Caf and Sandwich Place,Louisville,,US,"Vegetarian, Delicatessen",,2016-07-23T22:37:24Z,"Atlantic No. 5, Louisville: See 4 unbiased rev...",,...,,Atlantic No. 5,Yes,5028833398,40202,USD 25.00-40.00,KY,,,atlanticno5.com
2,3803 9th St S W,"American, Barbecue, Puyallup Restaurants, Barb...",Puyallup,,US,,,2015-11-19T22:22:31Z,"Description: Famous Dave's is a Barbecue , Am...",,...,,Famous Dave's,,2536040340,98373,USD 10.00 - USD 15.00,WA,5610.0,,http://famousdaves.com
3,30 S Main St,"Coffee & Tea, Restaurant, Restaurants, Bar, Am...",Oberlin,,US,"Coffee & Tea, Restaurants, American, Bar, Pu...",,2016-06-20T17:47:10Z,"The Feve, Oberlin: See 170 unbiased reviews of...",,...,/oberlin-oh/mip/the-feve-8940755/menu,The Feve,"mastercard, amex, discover, Yes, visa","[""4407741978"",""4197741978"",""4407753064""]",44074,USD 25.00-40.00,OH,,,http://www.thefeve.com
4,55 Crescent Dr,Sandwich Place and Fast Food Restaurant,Pleasant Hill,yellowpages.com,US,"Take Out Restaurants, Asian Restaurants, Resta...",,2016-07-23T23:55:05Z,"[""https://www.zagat.com/r/yalla-mediterranean-...",,...,/pleasant-hill-ca/mip/yalla-mediterranean-5061...,Quiznos,"master card, visa",9258258644,94523,$,CA,,quiznos,http://www.yallamedi.com/


In [4]:
df.shape

(18155, 29)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18155 entries, 0 to 18154
Data columns (total 29 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   address          17201 non-null  object 
 1   categories       17993 non-null  object 
 2   city             17534 non-null  object 
 3   claimed          820 non-null    object 
 4   country          18155 non-null  object 
 5   cuisines         10468 non-null  object 
 6   dateOpened       264 non-null    object 
 7   dateUpdated      18155 non-null  object 
 8   descriptions     11224 non-null  object 
 9   facebookPageURL  1182 non-null   object 
 10  features         6881 non-null   object 
 11  hours            7973 non-null   object 
 12  images           6576 non-null   object 
 13  isClosed         605 non-null    object 
 14  key              18155 non-null  object 
 15  lat              13704 non-null  float64
 16  languages        8 non-null      object 
 17  long        

## Field description

(for those that are not self-explanatory)

**claimed** - a list of websites where the business location has been claimed by the business' owner<br>
**domains** - a list of each unique domain found in the sourceURLs field<br>
**hours** - the hours of operation for this business location<br>
**keys** - a list of internal Datafiniti identifiers for this business. The keys field is used to merge raw data from individual sources into the master Datafiniti record <br>
**paymentTypes** - a list of payment options accepted by the business<br>
**province** - the province or state for this business location<br>
**sic** - the SIC code for the business<br>

fields that are more complicated, as they consist basically of dictionaries:

**descriptions** -  list of descriptions for this business from various sources <br><br>
Each description contains:

dateSeen: the date when this description has been seen for this business<br>
sourceURLs: a list of source URLs where this specific description was seen<br>
value: a unique description for this business

**features** - a list of features associated with this business location. The feature list can be collected from specific feature sections available on listings for this business. It can also serve as a catch-all field for miscellaneous characteristics of this business that don't make sense to include as top-level fields

Each feature contains:

key: a label for this feature<br>
replace: flag indicating value field is replaced versus appended<br>
value: a list of possible values for this feature

**hours** - the hours of operation for this business location<br><br>
Each hour object may contain:

day: the day(s) for this hour of operation<br>
dept: the department or business unit related to this hour of operation<br>
meal: the mealtime associated with these hours. Typically reserved for restaurant locations
hour: the hours of operation

**menus** - a list of menu items offered by the business. This field will only show for restaurants or other eating locations
<br><br>
Each menu item may include:

amountMin: the minimum price value of the item<br>
amountMax: the maximum price value of the item. This is typically the same as amountMin, except in cases where a price range is listed<br>
category: the menu section this item belongs to<br>
currency: the currency listed for amountMin and amountMax<br>
dateSeen: the date when this item was seen<br>
description: a description for the item<br>
name: the item's name on the menu<br>
sourceURLs: a list of URLs where this item was seen<br>

In [6]:
df.sic.value_counts()

5610      5962
[5610]     442
8423       268
4774       118
0          101
          ... 
4759         1
8211         1
4753         1
7310         1
9491         1
Name: sic, Length: 141, dtype: int64

## Explore features we can change to categorical

In [4]:
df.columns

Index(['address', 'categories', 'city', 'claimed', 'country', 'cuisines',
       'dateOpened', 'dateUpdated', 'descriptions', 'facebookPageURL',
       'features', 'hours', 'images', 'isClosed', 'key', 'lat', 'languages',
       'long', 'menus', 'menuURL', 'name', 'paymentTypes', 'phones',
       'postalCode', 'priceRange', 'province', 'sic', 'twitter', 'websites'],
      dtype='object')

- Categories

In [5]:
df.categories.value_counts()

Restaurant                                                                                                                                                                                                                                                                                                                                                                                                                                                       2257
Vegetarian / Vegan Restaurant                                                                                                                                                                                                                                                                                                                                                                                                                                     926
Restaurants                                                                                 

In [6]:
len(df.categories.unique())

9951

In [7]:
df.shape

(18155, 29)

In [8]:
orig_cats = df.categories.unique()
type(orig_cats)

numpy.ndarray

In [9]:
type(orig_cats[0])

str

In [10]:
df['categories'].str.contains("caterer").sum()

210

In [11]:
df[df['categories'].str.contains("aterer", na=False) & ~(df['categories'].str.contains("estaurant", na=False))].index

Int64Index([   22,   123,   155,   185,   283,   452,   463,   559,   572,
              604,
            ...
            17705, 17765, 17772, 17881, 17984, 18021, 18032, 18045, 18094,
            18149],
           dtype='int64', length=206)

In [12]:
restaurants = df.drop(df[df['categories'].str.contains("aterer", na=False) & ~(df['categories'].str.contains("estaurant", na=False))].index)

In [13]:
restaurants.categories.value_counts()

Restaurant                                                                                                                                                                                                                                                                                                                                                                                                                                                       2257
Vegetarian / Vegan Restaurant                                                                                                                                                                                                                                                                                                                                                                                                                                     926
Restaurants                                                                                 

In [14]:
restaurants.shape

(17949, 29)

In [15]:
restaurants['categories'].str.contains("estaurant").sum()

16879

In [16]:
check_these = restaurants[~restaurants['categories'].str.contains("estaurant", na=False)]
check_these.head()

Unnamed: 0,address,categories,city,claimed,country,cuisines,dateOpened,dateUpdated,descriptions,facebookPageURL,...,menuURL,name,paymentTypes,phones,postalCode,priceRange,province,sic,twitter,websites
1,605 W Main St,Caf and Sandwich Place,Louisville,,US,"Vegetarian, Delicatessen",,2016-07-23T22:37:24Z,"Atlantic No. 5, Louisville: See 4 unbiased rev...",,...,,Atlantic No. 5,Yes,5028833398,40202,USD 25.00-40.00,KY,,,atlanticno5.com
24,222 E Main St,Deli / Bodega,Port Washington,foursquare.com,US,"American, Vegetarian",,2016-07-23T23:16:43Z,"Dockside Deli, Port Washington: See 116 unbias...",,...,,Dockside Deli,Yes,"[""2622849440""]",53074,USD 0.00-25.00,WI,,docksidedeli,docksidedeli.com
33,2908 Fruth St,"[""Smoothies"",""Organic"",""Vegetarian""]",Austin,,US,"[""Bakery, Sandwiches/Subs, Vegetarian""]",,2016-04-23T11:54:05Z,,,...,,Juiceland - University of Texas - Austin,,,"[""78705""]",,TX,[2100],,
131,6117 E Us Highway 10,"Beer & Beverages, Convenience Stores, Liquor S...",Custer,,US,"Pizza By The Slice, French Fries / Onion Rings...",,2013-11-07T06:11:12Z,,,...,,Pioneer Party Store,,2317579508,49405-9761,,MI,4761,,
143,945 Columbus Ave,"Indian, Vegetarian",Manhattan,,US,,,2015-11-19T22:42:23Z,,,...,,Doaba Deli,,2122222636,10025,USD 0.00 - USD 10.00,NY,8423,,


In [17]:
check_these.categories

1                                   Caf and Sandwich Place
24                                           Deli / Bodega
33                    ["Smoothies","Organic","Vegetarian"]
131      Beer & Beverages, Convenience Stores, Liquor S...
143                                     Indian, Vegetarian
                               ...                        
18088                           Buffet, Indian, Vegetarian
18132                                    ice cream parlors
18133                                    Soups, Vegetarian
18140                American, Vegetarian, Modern American
18148                     Greek, Mediterranean, Vegetarian
Name: categories, Length: 1070, dtype: object

In [18]:
check_these.categories.str.contains("eer").sum()

1

In [19]:
restaurants[restaurants.address == "6117 E Us Highway 10"]

Unnamed: 0,address,categories,city,claimed,country,cuisines,dateOpened,dateUpdated,descriptions,facebookPageURL,...,menuURL,name,paymentTypes,phones,postalCode,priceRange,province,sic,twitter,websites
131,6117 E Us Highway 10,"Beer & Beverages, Convenience Stores, Liquor S...",Custer,,US,"Pizza By The Slice, French Fries / Onion Rings...",,2013-11-07T06:11:12Z,,,...,,Pioneer Party Store,,2317579508,49405-9761,,MI,4761,,


In [20]:
restaurants.reset_index(drop=True, inplace=True)
restaurants.iloc[[131]]

Unnamed: 0,address,categories,city,claimed,country,cuisines,dateOpened,dateUpdated,descriptions,facebookPageURL,...,menuURL,name,paymentTypes,phones,postalCode,priceRange,province,sic,twitter,websites
131,203 Main St W,"Restaurants-breakfast & Diners, Restaurants, A...",Baudette,,US,"Italian Cuisine, Gourmet Deli Cuisine, Sandwic...",,2015-11-19T22:33:53Z,Homemade Old Style Cookin,,...,,Mn Restaurant,,2186341165,56623,,MN,5610,,http://www.alicesfamilyrestaurant.net


In [21]:
restaurants[restaurants.address == "6117 E Us Highway 10"]

Unnamed: 0,address,categories,city,claimed,country,cuisines,dateOpened,dateUpdated,descriptions,facebookPageURL,...,menuURL,name,paymentTypes,phones,postalCode,priceRange,province,sic,twitter,websites
129,6117 E Us Highway 10,"Beer & Beverages, Convenience Stores, Liquor S...",Custer,,US,"Pizza By The Slice, French Fries / Onion Rings...",,2013-11-07T06:11:12Z,,,...,,Pioneer Party Store,,2317579508,49405-9761,,MI,4761,,


In [22]:
restaurants.iloc[128:131]

Unnamed: 0,address,categories,city,claimed,country,cuisines,dateOpened,dateUpdated,descriptions,facebookPageURL,...,menuURL,name,paymentTypes,phones,postalCode,priceRange,province,sic,twitter,websites
128,700 River Dr,Restaurant,Fort Bragg,,US,"Mexican, American, Asian, Vegan",,2016-05-16T22:43:43Z,"Tsunami Nacho Truck, Fort Bragg: See 7 unbiase...",,...,,Tsunami Nacho Truck,,14157307646,95437,,CA,,,
129,6117 E Us Highway 10,"Beer & Beverages, Convenience Stores, Liquor S...",Custer,,US,"Pizza By The Slice, French Fries / Onion Rings...",,2013-11-07T06:11:12Z,,,...,,Pioneer Party Store,,2317579508,49405-9761,,MI,4761.0,,
130,703 W Burlington Ave,"Restaurants, Fairfield Restaurants, Mexican / ...",Fairfield,,US,,,2015-11-19T22:34:07Z,"Tags: Mexican, American, and Family Style Stat...",,...,,Taco John's Fairfield Ia - Hours Reviews & More,,6414727833,52556,USD 0.00 - USD 10.00,IA,4774.0,,


In [23]:
rest_df=restaurants.drop([129])
rest_df.iloc[127:133]

Unnamed: 0,address,categories,city,claimed,country,cuisines,dateOpened,dateUpdated,descriptions,facebookPageURL,...,menuURL,name,paymentTypes,phones,postalCode,priceRange,province,sic,twitter,websites
127,2641 S Florida Ave,"Lakeland Restaurants, Southwest Lakeland, Ital...",Lakeland,,US,,,2014-02-02T04:33:43Z,"Italian, Bistro, European, Pasta, Sandwiches, ...",,...,,Fazoli's,,8636872808,33803-3860,,FL,5610.0,,
128,700 River Dr,Restaurant,Fort Bragg,,US,"Mexican, American, Asian, Vegan",,2016-05-16T22:43:43Z,"Tsunami Nacho Truck, Fort Bragg: See 7 unbiase...",,...,,Tsunami Nacho Truck,,14157307646,95437,,CA,,,
130,703 W Burlington Ave,"Restaurants, Fairfield Restaurants, Mexican / ...",Fairfield,,US,,,2015-11-19T22:34:07Z,"Tags: Mexican, American, and Family Style Stat...",,...,,Taco John's Fairfield Ia - Hours Reviews & More,,6414727833,52556,USD 0.00 - USD 10.00,IA,4774.0,,
131,203 Main St W,"Restaurants-breakfast & Diners, Restaurants, A...",Baudette,,US,"Italian Cuisine, Gourmet Deli Cuisine, Sandwic...",,2015-11-19T22:33:53Z,Homemade Old Style Cookin,,...,,Mn Restaurant,,2186341165,56623,,MN,5610.0,,http://www.alicesfamilyrestaurant.net
132,2655 Capital Cir N E,"Tallahassee > Catering, Tallahassee > Restaura...",Tallahassee,,US,,,2013-09-06T04:46:44Z,"Full service catering! Delicious, healthy food!!!",,...,,Tastebudz Catering Takeout & Takehome,,8503097348,32308,,FL,5610.0,,http://www.tastebudz.net
133,41 E 4th St,"[""Take Out Restaurants"",""Fast Food Restaurants...",Emporium,,US,"Take Out Restaurants, Fast Food Restaurants, R...",,2016-05-23T08:17:29Z,"[""Tags: Pizza, Family Style, and Vegetarian St...",,...,/emporium-pa/mip/foxs-pizza-den-4385237/menu,Fox's Pizza Den,"discover, amex, all major credit cards, visa, ...","[""8144863637""]","[""15834""]",$,PA,5610.0,,http://www.foxspizzaden.com


In [24]:
rest_df.shape

(17948, 29)

We'll keep these rows.  Now just select a few columns for analysis.

In [25]:
cols = ["city", "province", "lat", "long"]
locations_df = rest_df[cols]
locations_df.head()

Unnamed: 0,city,province,lat,long
0,Clearwater,FL,27.971165,-82.696938
1,Louisville,KY,38.25702,-85.760013
2,Puyallup,WA,47.154828,-122.303992
3,Oberlin,OH,41.290823,-82.21764
4,Pleasant Hill,CA,37.945663,-122.061997


### Install more packages I'll need to show the map of the restaurants

In [26]:
! pip install geopandas



In [27]:
# import descartes
import geopandas as gpd
# from shapely.geometry import Point, Polygon
import plotly.express as px

In [28]:
restaurant_map_df = gpd.GeoDataFrame(locations_df, geometry=gpd.points_from_xy(locations_df.long, locations_df.lat))

In [29]:
locations_df.long

0        -82.696938
1        -85.760013
2       -122.303992
3        -82.217640
4       -122.061997
            ...    
17944    -93.101769
17945    -81.092260
17946    -80.268463
17947    -80.090760
17948           NaN
Name: long, Length: 17948, dtype: float64

In [None]:
locations_df.to_csv('rest_locations.csv', index=False)