In [2]:
import pandas as pd
import numpy as np

In [43]:
# Read business.csv file
df_business = pd.read_csv('business.csv')

# Get the following columns: address, attributes, categories, city, name, postal_code, review_count, stars, state
df_business = df_business[['address', 'attributes', 'categories', 'city', 'name', 'postal_code', 
                           'review_count', 'stars', 'state']]

# Get businesses in the 'state' of NV
df_business = df_business.loc[df_business['state'] == ('NV')]

# Create a column to check if the business is a 'Restaurant'
df_business['is a restaurant'] = df_business['categories'].str.contains("Restaurants")

# Filter dataframe for rows that are True for 'is a restaurant'
df_business = df_business.loc[df_business['is a restaurant'] == (True)]

# Get businesses in Las Vegas of NV
df_business = df_business.loc[df_business['city'].str.contains("Vegas")]

# Drop where attributes is NaN
df_business = df_business.dropna(subset=['attributes'])
df_business

Unnamed: 0,address,attributes,categories,city,name,postal_code,review_count,stars,state,is a restaurant
17,"1775 E Tropicana Ave, Ste 29","{'OutdoorSeating': 'False', 'BusinessAcceptsCr...","Restaurants, Italian",Las Vegas,Carluccio's Tivoli Gardens,89119,40,4.0,NV,True
25,6055 E Lake Mead Blvd,"{'BikeParking': 'True', 'BusinessParking': ""{'...","Mexican, Restaurants, Patisserie/Cake Shop, Fo...",Las Vegas,Maria's Mexican Restaurant & Bakery,89156,184,4.5,NV,True
75,6125 Spring Mountain Rd,"{'RestaurantsPriceRange2': '1', 'Ambience': ""{...","Fast Food, Food, Restaurants, Ice Cream & Froz...",Las Vegas,Dairy Queen,89146,33,2.0,NV,True
135,"Artisan Hotel, 1501 W Sahara Ave","{'RestaurantsAttire': ""'dressy'"", 'Corkage': '...","Restaurants, Pizza, Italian, American (New)",Las Vegas,Artisan Fine Dining Room,89102,3,2.0,NV,True
173,241 W Charleston Blvd,"{'BusinessParking': ""{'garage': False, 'street...","Food, Pizza, Wine Bars, Bars, Restaurants, Nig...",Las Vegas,Bistro Divino,89102,3,4.5,NV,True
174,3655 Las Vegas Blvd S,"{'RestaurantsTakeOut': 'True', 'RestaurantsDel...","French, Restaurants, Creperies",Las Vegas,La Creperie,89109,535,3.5,NV,True
176,2411 W Sahara Ave,"{'RestaurantsDelivery': 'False', 'RestaurantsT...","Buffets, Restaurants",Las Vegas,Feast Buffet,89102,287,3.0,NV,True
206,"3500 Las Vegas Blvd S, Ste E11","{'RestaurantsAttire': ""'casual'"", 'Restaurants...","Sandwiches, Restaurants, Delis, Desserts, Food",Las Vegas,Stage Deli Of Las Vegas,89109,28,3.5,NV,True
214,1263 Silverado Ranch Blvd,"{'RestaurantsDelivery': 'True', 'BusinessParki...","Middle Eastern, Restaurants, Vegetarian, Juice...",Las Vegas,Pita Pit,89183,77,4.0,NV,True
216,"3342 E Sandhill Rd, Ste 11","{'GoodForDancing': 'False', 'GoodForKids': 'Fa...","Dive Bars, Food, Bars, Pubs, Restaurants, Nigh...",Las Vegas,Mr G's Pub & Grub,89121,27,4.0,NV,True


In [44]:
# Convert the dictionary objects in 'attributes' to columns in a new df
df_business["attributes"] = df_business["attributes"].apply(lambda x : dict(eval(x)) )
df_attributes = df_business["attributes"].apply(pd.Series)
df_attributes

Unnamed: 0,AcceptsInsurance,AgesAllowed,Alcohol,Ambience,BYOB,BYOBCorkage,BestNights,BikeParking,BusinessAcceptsBitcoin,BusinessAcceptsCreditCards,...,RestaurantsCounterService,RestaurantsDelivery,RestaurantsGoodForGroups,RestaurantsPriceRange2,RestaurantsReservations,RestaurantsTableService,RestaurantsTakeOut,Smoking,WheelchairAccessible,WiFi
17,,,u'full_bar',"{'romantic': True, 'intimate': False, 'tourist...",,'no',,,,True,...,,False,True,2,True,,True,,,u'no'
25,,,u'beer_and_wine',"{'romantic': False, 'intimate': False, 'classy...",,,,True,,True,...,,False,True,1,False,,True,,,u'no'
75,,,u'none',"{'romantic': False, 'intimate': False, 'classy...",,,,True,,True,...,,False,True,1,False,,True,,,'no'
135,,,u'full_bar',,,'yes_corkage',,False,,True,...,,False,True,4,True,,False,,,u'no'
173,,,,,,,,,,True,...,,,,2,,,,,,
174,,,'none',"{'romantic': False, 'intimate': False, 'touris...",,'yes_free',,False,,True,...,,False,True,2,False,,True,,,'no'
176,,,u'beer_and_wine',"{'romantic': False, 'intimate': False, 'touris...",False,'yes_free',,True,,True,...,,False,True,1,False,True,False,,True,'free'
206,,,,,,,,,,True,...,,False,True,2,,,True,,,
214,,,u'none',"{'romantic': False, 'intimate': False, 'classy...",,,,False,False,True,...,,True,True,1,False,,True,,,'no'
216,,,u'full_bar',"{'romantic': False, 'intimate': False, 'classy...",,,"{'monday': False, 'tuesday': False, 'friday': ...",True,,True,...,,False,True,1,False,True,True,,,'no'


In [46]:
# Since 'Ambience' is a subjective measurement, we decided to remove it from the dataframe. 
# The definition for words like "romantic", "hipster", or "touristy" vary between Yelp users 
# and is difficult to account for.

# Drop 'Ambience' column
df_attributes = df_attributes.drop(['Ambience'], axis = 1)
df_attributes

Unnamed: 0,AcceptsInsurance,AgesAllowed,Alcohol,BYOB,BYOBCorkage,BestNights,BikeParking,BusinessAcceptsBitcoin,BusinessAcceptsCreditCards,BusinessParking,...,RestaurantsCounterService,RestaurantsDelivery,RestaurantsGoodForGroups,RestaurantsPriceRange2,RestaurantsReservations,RestaurantsTableService,RestaurantsTakeOut,Smoking,WheelchairAccessible,WiFi
17,,,u'full_bar',,'no',,,,True,"{'garage': False, 'street': False, 'validated'...",...,,False,True,2,True,,True,,,u'no'
25,,,u'beer_and_wine',,,,True,,True,"{'garage': False, 'street': False, 'validated'...",...,,False,True,1,False,,True,,,u'no'
75,,,u'none',,,,True,,True,"{'garage': False, 'street': False, 'validated'...",...,,False,True,1,False,,True,,,'no'
135,,,u'full_bar',,'yes_corkage',,False,,True,"{'garage': False, 'street': False, 'validated'...",...,,False,True,4,True,,False,,,u'no'
173,,,,,,,,,True,"{'garage': False, 'street': False, 'validated'...",...,,,,2,,,,,,
174,,,'none',,'yes_free',,False,,True,"{'garage': True, 'street': False, 'validated':...",...,,False,True,2,False,,True,,,'no'
176,,,u'beer_and_wine',False,'yes_free',,True,,True,"{'garage': True, 'street': False, 'validated':...",...,,False,True,1,False,True,False,,True,'free'
206,,,,,,,,,True,"{'garage': True, 'street': False, 'validated':...",...,,False,True,2,,,True,,,
214,,,u'none',,,,False,False,True,"{'garage': False, 'street': False, 'validated'...",...,,True,True,1,False,,True,,,'no'
216,,,u'full_bar',,,"{'monday': False, 'tuesday': False, 'friday': ...",True,,True,"{'garage': False, 'street': False, 'validated'...",...,,False,True,1,False,True,True,,,'no'


In [49]:
# Ratio of missing NaN attributes
df_missing = pd.DataFrame([{'column':c, 'missing': (df_attributes[c].isnull().sum()/df_attributes.shape[0])} for c in df_attributes.columns])
df_missing = df_missing.sort_values('missing', ascending=False)
df_missing

# lists of columns that have a missing ratio greater than 0.25%
droppable_features = []
droppable_features.extend(df_missing[df_missing.missing > 0.25].column.tolist())
droppable_features

# Drop columns with more than 25% missing data
df_attributes.drop(droppable_features, axis=1, inplace=True)
df_attributes

Unnamed: 0,Alcohol,BusinessAcceptsCreditCards,BusinessParking,GoodForKids,HasTV,NoiseLevel,OutdoorSeating,RestaurantsAttire,RestaurantsDelivery,RestaurantsGoodForGroups,RestaurantsPriceRange2,RestaurantsReservations,RestaurantsTakeOut,WiFi
17,u'full_bar',True,"{'garage': False, 'street': False, 'validated'...",True,False,u'quiet',False,'casual',False,True,2,True,True,u'no'
25,u'beer_and_wine',True,"{'garage': False, 'street': False, 'validated'...",True,True,'average',False,u'casual',False,True,1,False,True,u'no'
75,u'none',True,"{'garage': False, 'street': False, 'validated'...",True,False,u'average',False,u'casual',False,True,1,False,True,'no'
135,u'full_bar',True,"{'garage': False, 'street': False, 'validated'...",False,True,u'quiet',False,'dressy',False,True,4,True,False,u'no'
173,,True,"{'garage': False, 'street': False, 'validated'...",,,,,,,,2,,,
174,'none',True,"{'garage': True, 'street': False, 'validated':...",True,False,u'average',False,'casual',False,True,2,False,True,'no'
176,u'beer_and_wine',True,"{'garage': True, 'street': False, 'validated':...",True,False,u'average',False,'casual',False,True,1,False,False,'free'
206,,True,"{'garage': True, 'street': False, 'validated':...",True,,,False,'casual',False,True,2,,True,
214,u'none',True,"{'garage': False, 'street': False, 'validated'...",True,False,'average',False,u'casual',True,True,1,False,True,'no'
216,u'full_bar',True,"{'garage': False, 'street': False, 'validated'...",False,True,u'quiet',False,'casual',False,True,1,False,True,'no'


TypeError: eval() arg 1 must be a string, bytes or code object

In [5]:
# Concat df_business and df_attributes
df_combined = pd.concat([df_business, df_attributes], axis=1)
df_combined

Unnamed: 0,address,attributes,categories,city,name,postal_code,review_count,stars,state,is a restaurant,...,RestaurantsCounterService,RestaurantsDelivery,RestaurantsGoodForGroups,RestaurantsPriceRange2,RestaurantsReservations,RestaurantsTableService,RestaurantsTakeOut,Smoking,WheelchairAccessible,WiFi
17,"1775 E Tropicana Ave, Ste 29","{'OutdoorSeating': 'False', 'BusinessAcceptsCr...","Restaurants, Italian",Las Vegas,Carluccio's Tivoli Gardens,89119,40,4.0,NV,True,...,,False,True,2,True,,True,,,u'no'
25,6055 E Lake Mead Blvd,"{'BikeParking': 'True', 'BusinessParking': '{'...","Mexican, Restaurants, Patisserie/Cake Shop, Fo...",Las Vegas,Maria's Mexican Restaurant & Bakery,89156,184,4.5,NV,True,...,,False,True,1,False,,True,,,u'no'
75,6125 Spring Mountain Rd,"{'RestaurantsPriceRange2': '1', 'Ambience': '{...","Fast Food, Food, Restaurants, Ice Cream & Froz...",Las Vegas,Dairy Queen,89146,33,2.0,NV,True,...,,False,True,1,False,,True,,,'no'
135,"Artisan Hotel, 1501 W Sahara Ave","{'RestaurantsAttire': ''dressy'', 'Corkage': '...","Restaurants, Pizza, Italian, American (New)",Las Vegas,Artisan Fine Dining Room,89102,3,2.0,NV,True,...,,False,True,4,True,,False,,,u'no'
173,241 W Charleston Blvd,"{'BusinessParking': '{'garage': False, 'street...","Food, Pizza, Wine Bars, Bars, Restaurants, Nig...",Las Vegas,Bistro Divino,89102,3,4.5,NV,True,...,,,,2,,,,,,
174,3655 Las Vegas Blvd S,"{'RestaurantsTakeOut': 'True', 'RestaurantsDel...","French, Restaurants, Creperies",Las Vegas,La Creperie,89109,535,3.5,NV,True,...,,False,True,2,False,,True,,,'no'
176,2411 W Sahara Ave,"{'RestaurantsDelivery': 'False', 'RestaurantsT...","Buffets, Restaurants",Las Vegas,Feast Buffet,89102,287,3.0,NV,True,...,,False,True,1,False,True,False,,True,'free'
206,"3500 Las Vegas Blvd S, Ste E11","{'RestaurantsAttire': ''casual'', 'Restaurants...","Sandwiches, Restaurants, Delis, Desserts, Food",Las Vegas,Stage Deli Of Las Vegas,89109,28,3.5,NV,True,...,,False,True,2,,,True,,,
214,1263 Silverado Ranch Blvd,"{'RestaurantsDelivery': 'True', 'BusinessParki...","Middle Eastern, Restaurants, Vegetarian, Juice...",Las Vegas,Pita Pit,89183,77,4.0,NV,True,...,,True,True,1,False,,True,,,'no'
216,"3342 E Sandhill Rd, Ste 11","{'GoodForDancing': 'False', 'GoodForKids': 'Fa...","Dive Bars, Food, Bars, Pubs, Restaurants, Nigh...",Las Vegas,Mr G's Pub & Grub,89121,27,4.0,NV,True,...,,False,True,1,False,True,True,,,'no'


In [9]:
# Create new df for restaurants between 1 and 2 dollar signs
df_price_1to2 = df_combined.loc[(df_combined['RestaurantsPriceRange2'] == ('1')) 
                                | (df_combined['RestaurantsPriceRange2'] == ('2'))]

# Create new df for restaurants between 3 and 4 dollar signs
df_price_3to4 = df_combined.loc[(df_combined['RestaurantsPriceRange2'] == ('3')) 
                                | (df_combined['RestaurantsPriceRange2'] == ('4'))]

# Create new df for restaurants with 5 dollar signs
df_price_5 = df_combined.loc[df_combined['RestaurantsPriceRange2'] == ('5')]

array(['2', '1'], dtype=object)

Viewing Restaurants with a 1-2 dollar price range according to Yelp