# Attributes Analysis

In [1]:
import json, csv
import pandas as pd
import numpy as np
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import re
import researchpy as rp

In [2]:
def read_json(path):
    with open(path, encoding='utf-8') as f:
        iter_f = iter(f)
        line = f.readline()
        text = []
        for line in iter_f: 
            d = json.loads(line)
            text.append(d)
        f.close()
    return(text)

In [3]:
raw_business = read_json("business.json")

seafood_business = []
index = []

for i in range(len(raw_business)):
    if raw_business[i]['categories'] is None:
        continue
    if "Seafood" in raw_business[i]['categories'] and "Restaurants" in raw_business[i]['categories'] \
    and "Steakhouse" not in raw_business[i]['categories'] and len(raw_business[i]['categories']) <= 50:
        if raw_business[i]['review_count'] >= 50:
            seafood_business.append(raw_business[i])
            index.append(i)

In [4]:
seafood_business_df = pd.read_csv("seafood_business.csv")
seafood_business_df.head()

Unnamed: 0.1,Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state
0,0,550 N Hayden Rd,"{'BusinessAcceptsCreditCards': 'True', 'Restau...",nsNONDHbV7Vudqh21uicqw,"Seafood, Restaurants",Scottsdale,"{'Monday': '16:0-0:0', 'Tuesday': '16:0-0:0', ...",1,33.454286,-111.909405,Salt Cellar,85257,526,3.5,AZ
1,1,7537 S Rainbow Blvd,"{'Caters': 'True', 'RestaurantsReservations': ...",F06m2yQSPHIrb1IT7heYeQ,"Chinese, Seafood, Cantonese, Noodles, Restaurants",Las Vegas,"{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",1,36.05145,-115.244115,Rainbow Kitchen,89139,101,4.0,NV
2,2,633 College Street W,"{'Ambience': ""{'touristy': False, 'hipster': F...",c_FTil8s5PS2l_YJDQAXhA,"Restaurants, Bars, Seafood, Nightlife, Pubs",Toronto,"{'Monday': '16:0-1:0', 'Tuesday': '16:0-1:0', ...",1,43.654804,-79.414994,Hogtown Pub & Oysters,M6G 3A7,50,4.0,ON
3,3,3700 W Flamingo Rd,"{'BusinessParking': ""{'garage': True, 'street'...",vAbRNF0hJt91oCUpm_sdKw,"Restaurants, Seafood",Las Vegas,"{'Monday': '17:0-22:0', 'Tuesday': '17:0-22:0'...",0,36.117433,-115.187379,Buzios Seafood Restaurant,89103,102,3.5,NV
4,4,53 Scollard Street,"{'Ambience': ""{'romantic': False, 'intimate': ...",z8H4Mdzl4jS9pYswj6Jf9w,"Seafood, Breakfast & Brunch, Italian, Restaurants",Toronto,"{'Monday': '0:0-0:0', 'Tuesday': '11:30-22:0',...",1,43.672142,-79.389392,Buca Yorkville,M5R 0A1,158,3.5,ON


## All attributes

In [5]:
attributes_all = list()

for j in range(len(seafood_business)):
    if seafood_business[j]['attributes'] is None:
        continue
    else:
        attributes = seafood_business[j]['attributes'].keys()
        attributes = list(attributes)
        for i in range(len(attributes)):
            if attributes[i] in attributes_all:
                continue
            else:
                attributes_all.append(attributes[i])
        
sorted(attributes_all, key=str.lower)

['Alcohol',
 'Ambience',
 'BestNights',
 'BikeParking',
 'BusinessAcceptsBitcoin',
 'BusinessAcceptsCreditCards',
 'BusinessParking',
 'BYOB',
 'BYOBCorkage',
 'Caters',
 'CoatCheck',
 'Corkage',
 'DogsAllowed',
 'DriveThru',
 'GoodForDancing',
 'GoodForKids',
 'GoodForMeal',
 'HappyHour',
 'HasTV',
 'Music',
 'NoiseLevel',
 'OutdoorSeating',
 'RestaurantsAttire',
 'RestaurantsDelivery',
 'RestaurantsGoodForGroups',
 'RestaurantsPriceRange2',
 'RestaurantsReservations',
 'RestaurantsTableService',
 'RestaurantsTakeOut',
 'Smoking',
 'WheelchairAccessible',
 'WiFi']

In [6]:
len(attributes_all)

32

### eg. 'Alcohol'

In [7]:
def attributes_items(attribute):
    items_all = list()
    for j in range(len(seafood_business)):
        if seafood_business[j]['attributes'] is None:
            continue
        else:
            attributes = seafood_business[j]['attributes'].keys()
            attributes = list(attributes)
            if attribute in attributes: # if the given attribute exists
                items = seafood_business[j]['attributes'][attribute]
                if items is None:
                    continue
                else:                
                    if items not in items_all:
                        items_all.append(items)                
    return(items_all)

In [8]:
attributes_items('Alcohol')

["'full_bar'",
 "'beer_and_wine'",
 "u'full_bar'",
 "u'none'",
 "u'beer_and_wine'",
 "'none'"]

## Attributes counts

In [9]:
def attributes_counts(attribute):
    count = 0
    for j in range(len(seafood_business)):
        if seafood_business[j]['attributes'] is None:
            continue
        else:
            attributes = seafood_business[j]['attributes'].keys()
            attributes = list(attributes)
            if attribute in attributes: # if the given attribute exists
                count += 1
    return(count)    

In [10]:
attributes_counts('Alcohol')

431

In [11]:
attributes_all = sorted(attributes_all, key=str.lower)
counts = list()

for i in range(len(attributes_all)):
    count = attributes_counts(attributes_all[i])
    counts.append(count)

In [12]:
attributes_counts = pd.concat([pd.Series(attributes_all), pd.Series(counts)], axis=1)
attributes_counts.columns = ["Attributes", "Counts"]
attributes_counts = attributes_counts.sort_values(by='Counts')
attributes_counts

Unnamed: 0,Attributes,Counts
7,BYOB,1
13,DriveThru,5
17,HappyHour,9
29,Smoking,9
14,GoodForDancing,10
10,CoatCheck,10
2,BestNights,10
19,Music,11
11,Corkage,12
8,BYOBCorkage,35


## Attributes selected

In [13]:
attributes_big = attributes_counts.loc[5:,]
attributes_big

Unnamed: 0,Attributes,Counts
5,BusinessAcceptsCreditCards,347
16,GoodForMeal,381
9,Caters,401
3,BikeParking,414
31,WiFi,426
20,NoiseLevel,427
22,RestaurantsAttire,430
0,Alcohol,431
18,HasTV,432
23,RestaurantsDelivery,432


In [14]:
# see attributes contents appeared

for i in attributes_big['Attributes']:
    print(i, ':', attributes_items(i))

BusinessAcceptsCreditCards : ['True', 'False']
GoodForMeal : ["{'dessert': False, 'latenight': False, 'lunch': False, 'dinner': True, 'brunch': False, 'breakfast': False}", "{'dessert': False, 'latenight': True, 'lunch': True, 'dinner': True, 'brunch': False, 'breakfast': False}", "{'dessert': False, 'latenight': False, 'lunch': True, 'dinner': True, 'brunch': False, 'breakfast': False}", "{'dessert': False, 'latenight': False, 'lunch': True, 'dinner': False, 'brunch': False, 'breakfast': False}", "{'dessert': True, 'latenight': False, 'lunch': True, 'dinner': True, 'brunch': False, 'breakfast': False}", "{'dessert': True, 'latenight': False, 'lunch': True, 'dinner': True, 'brunch': True, 'breakfast': False}", "{'dessert': False, 'latenight': True, 'lunch': True, 'dinner': False, 'brunch': True, 'breakfast': False}", "{'dessert': False, 'latenight': False, 'lunch': True, 'dinner': True, 'brunch': True, 'breakfast': False}", "{'dessert': False, 'latenight': False, 'lunch': False, 'dinne

### Attributes that have subdivided levels:
* "GoodForMeal": dessert, latenight, lunch, dinner, brunch, breakfast.
* "Ambience": touristy, hipster, romantic, divey, intimate, trendy, upscale, classy, casual...
* "BusinessParking": garage, street, validated, lot, valet...

In [15]:
attributes_selected = list(attributes_big['Attributes'])
attributes_selected

['BusinessAcceptsCreditCards',
 'GoodForMeal',
 'Caters',
 'BikeParking',
 'WiFi',
 'NoiseLevel',
 'RestaurantsAttire',
 'Alcohol',
 'HasTV',
 'RestaurantsDelivery',
 'RestaurantsPriceRange2',
 'Ambience',
 'OutdoorSeating',
 'RestaurantsGoodForGroups',
 'RestaurantsReservations',
 'BusinessParking',
 'RestaurantsTakeOut',
 'GoodForKids']

In [16]:
business_attributes=[]
for business in seafood_business:
    new_dic={}
    new_dic['business_id']=business['business_id']
    for attribute in attributes_selected:
        if attribute not in business['attributes'].keys():
            new_dic[attribute]=None
        else:
            new_dic[attribute]=business['attributes'][attribute]
    business_attributes.append(new_dic)

In [17]:
def correct_form(attribute):
    for business in business_attributes:
        if business[attribute] != None:
            if re.search('^u(.*)',business[attribute]):
                business[attribute]=re.search('^u(.*)',business[attribute]).group(1)

In [18]:
correct_form('Alcohol')
correct_form('NoiseLevel')
correct_form('RestaurantsAttire')
correct_form('WiFi')

In [19]:
business_attributes_df = pd.DataFrame(business_attributes)
business_attributes_df[0:5]

Unnamed: 0,Alcohol,Ambience,BikeParking,BusinessAcceptsCreditCards,BusinessParking,Caters,GoodForKids,GoodForMeal,HasTV,NoiseLevel,OutdoorSeating,RestaurantsAttire,RestaurantsDelivery,RestaurantsGoodForGroups,RestaurantsPriceRange2,RestaurantsReservations,RestaurantsTakeOut,WiFi,business_id
0,'full_bar',"{'touristy': False, 'hipster': False, 'romanti...",False,True,"{'garage': False, 'street': False, 'validated'...",False,False,"{'dessert': False, 'latenight': False, 'lunch'...",True,'average',False,'casual',False,True,3,True,False,'no',nsNONDHbV7Vudqh21uicqw
1,'beer_and_wine',"{'touristy': False, 'hipster': False, 'romanti...",True,True,"{'garage': False, 'street': False, 'validated'...",True,True,"{'dessert': False, 'latenight': True, 'lunch':...",True,'average',False,'casual',False,True,2,True,True,'free',F06m2yQSPHIrb1IT7heYeQ
2,'full_bar',"{'touristy': False, 'hipster': False, 'romanti...",True,,"{'garage': False, 'street': True, 'validated':...",False,False,"{'dessert': False, 'latenight': True, 'lunch':...",True,'average',False,'casual',False,True,2,True,True,'no',c_FTil8s5PS2l_YJDQAXhA
3,'full_bar',,False,True,"{'garage': True, 'street': False, 'validated':...",,False,,False,'average',False,'casual',False,True,3,True,False,'no',vAbRNF0hJt91oCUpm_sdKw
4,'full_bar',"{'romantic': False, 'intimate': False, 'classy...",False,,"{'garage': True, 'street': False, 'validated':...",False,False,"{'dessert': False, 'latenight': False, 'lunch'...",False,'loud',False,'dressy',False,True,3,True,False,'no',z8H4Mdzl4jS9pYswj6Jf9w


## Make up missing attributes

In [20]:
def find_none(attribute):
    indexs=[]
    for index, row in business_attributes_df.iterrows():
        if row[attribute] is None:
            indexs.append(index)
    return(indexs)

* For **WiFi**:

In [21]:
for i in find_none('WiFi'):
    print('index:', i, ',', seafood_business[i]['name'], ',', seafood_business[i]['city'],  ',', seafood_business[i]['address'])

index: 54 , Cucina Tagliani , Glendale , 17045 N 59th Ave, Ste 101
index: 104 , Ginger Garden , Gilbert , 3310 S Higley Rd, Ste C104
index: 210 , Cucina Tagliani , Peoria , 8349 W Bell Rd
index: 218 , Ahipoki , Phoenix , 1928 E Highland Ave, Ste F104
index: 267 , Island Thyme - A Calypso Grill & Pub , Charlotte , 8129 Ardrey Kell Rd
index: 390 , Master Taco , Surprise , 14291 W Grand Ave, Ste 106
index: 415 , Poke' Fresh , Akron , 46 N Hawkins Ave, Suite 215


In [22]:
#business_attributes_df.loc[54]['WiFi'] doesn't provide 
#business_attributes_df.loc[104]['WiFi'] doesn't provide 
#business_attributes_df.loc[210]['WiFi'] CLOSED
#business_attributes_df.loc[218]['WiFi'] doesn't provide 
#business_attributes_df.loc[267]['WiFi'] doesn't provide 
#business_attributes_df.loc[390]['WiFi'] doesn't provide 
business_attributes_df.loc[415]['WiFi'] = "'no'"

* For **NoiseLevel**:

In [23]:
for i in find_none('NoiseLevel'):
    print('index:', i, ',', seafood_business[i]['name'], ',', seafood_business[i]['city'],  ',', seafood_business[i]['address'])

index: 21 , Cajun Yard Dog , Fort Mill , 901 Crossroads Plz
index: 44 , Ahipoki , Avondale , 10321 W McDowell Rd, Ste A-103
index: 267 , Island Thyme - A Calypso Grill & Pub , Charlotte , 8129 Ardrey Kell Rd
index: 319 , Mariscos El Pariente , Las Vegas , 6182 W Flamingo Rd
index: 361 , Frijoles & Frescas , Las Vegas , 2490 E Desert Inn Rd
index: 390 , Master Taco , Surprise , 14291 W Grand Ave, Ste 106


In [24]:
business_attributes_df.loc[21]['NoiseLevel'] = "'average'"
#business_attributes_df.loc[44]['NoiseLevel'] doesn't provide 
business_attributes_df.loc[267]['NoiseLevel'] = "'average'"
#business_attributes_df.loc[319]['NoiseLevel'] CLOSED
#business_attributes_df.loc[361]['NoiseLevel'] doesn't provide 
business_attributes_df.loc[390]['NoiseLevel'] = "'quiet'"

* For **RestaurantsAttire**:

In [25]:
for i in find_none('RestaurantsAttire'):
    print('index:', i, ',', seafood_business[i]['name'], ',', seafood_business[i]['city'],  ',', seafood_business[i]['address'])

index: 267 , Island Thyme - A Calypso Grill & Pub , Charlotte , 8129 Ardrey Kell Rd
index: 390 , Master Taco , Surprise , 14291 W Grand Ave, Ste 106
index: 415 , Poke' Fresh , Akron , 46 N Hawkins Ave, Suite 215


In [26]:
#business_attributes_df.loc[267]['RestaurantsAttire'] doesn't provide 
#business_attributes_df.loc[390]['RestaurantsAttire'] doesn't provide 
#business_attributes_df.loc[415]['RestaurantsAttire'] doesn't provide 

* For **Alcohol**:

In [27]:
for i in find_none('Alcohol'):
    print('index:', i, ',', seafood_business[i]['name'], ',', seafood_business[i]['city'],  ',', seafood_business[i]['address'])

index: 218 , Ahipoki , Phoenix , 1928 E Highland Ave, Ste F104
index: 267 , Island Thyme - A Calypso Grill & Pub , Charlotte , 8129 Ardrey Kell Rd


In [28]:
#business_attributes_df.loc[218]['Alcohol'] doesn't provide 
business_attributes_df.loc[267]['Alcohol'] = "'full_bar'"

* For **HasTV**:

In [29]:
for i in find_none('HasTV'):
    print('index:', i, ',', seafood_business[i]['name'], ',', seafood_business[i]['city'],  ',', seafood_business[i]['address'])

index: 267 , Island Thyme - A Calypso Grill & Pub , Charlotte , 8129 Ardrey Kell Rd


In [30]:
business_attributes[267]['HasTV'] = 'True'

* For **RestaurantsDelivery**:

In [31]:
for i in find_none('RestaurantsDelivery'):
    print('index:', i, ',', seafood_business[i]['name'], ',', seafood_business[i]['city'],  ',', seafood_business[i]['address'])

index: 258 , Lobster & Pho , Independence , 6901 Rockside Rd


In [32]:
business_attributes[258]['RestaurantsDelivery'] = 'False'

* For **RestaurantsPriceRange2**:

In [33]:
for i in find_none('RestaurantsPriceRange2'):
    print('index:', i, ',', seafood_business[i]['name'], ',', seafood_business[i]['city'],  ',', seafood_business[i]['address'])

index: 267 , Island Thyme - A Calypso Grill & Pub , Charlotte , 8129 Ardrey Kell Rd


In [34]:
business_attributes[267]['RestaurantsPriceRange2'] = '2'

* For **Ambience**:

In [35]:
for i in find_none('Ambience'):
    print('index:', i, ',', seafood_business[i]['name'], ',', seafood_business[i]['city'],  ',', seafood_business[i]['address'])

index: 3 , Buzios Seafood Restaurant , Las Vegas , 3700 W Flamingo Rd


In [36]:
#business_attributes[3]['Ambience'] CLOSED

* For **OutdoorSeating**:

In [37]:
for i in find_none('OutdoorSeating'):
    print('index:', i, ',', seafood_business[i]['name'], ',', seafood_business[i]['city'],  ',', seafood_business[i]['address'])

index: 267 , Island Thyme - A Calypso Grill & Pub , Charlotte , 8129 Ardrey Kell Rd


In [38]:
business_attributes[267]['OutdoorSeating'] = 'True'

## One-Way ANOVA

In [39]:
attributes_stars_df = pd.concat([business_attributes_df, seafood_business_df['stars']], axis=1)
attributes_stars_df.head()

Unnamed: 0,Alcohol,Ambience,BikeParking,BusinessAcceptsCreditCards,BusinessParking,Caters,GoodForKids,GoodForMeal,HasTV,NoiseLevel,OutdoorSeating,RestaurantsAttire,RestaurantsDelivery,RestaurantsGoodForGroups,RestaurantsPriceRange2,RestaurantsReservations,RestaurantsTakeOut,WiFi,business_id,stars
0,'full_bar',"{'touristy': False, 'hipster': False, 'romanti...",False,True,"{'garage': False, 'street': False, 'validated'...",False,False,"{'dessert': False, 'latenight': False, 'lunch'...",True,'average',False,'casual',False,True,3,True,False,'no',nsNONDHbV7Vudqh21uicqw,3.5
1,'beer_and_wine',"{'touristy': False, 'hipster': False, 'romanti...",True,True,"{'garage': False, 'street': False, 'validated'...",True,True,"{'dessert': False, 'latenight': True, 'lunch':...",True,'average',False,'casual',False,True,2,True,True,'free',F06m2yQSPHIrb1IT7heYeQ,4.0
2,'full_bar',"{'touristy': False, 'hipster': False, 'romanti...",True,,"{'garage': False, 'street': True, 'validated':...",False,False,"{'dessert': False, 'latenight': True, 'lunch':...",True,'average',False,'casual',False,True,2,True,True,'no',c_FTil8s5PS2l_YJDQAXhA,4.0
3,'full_bar',,False,True,"{'garage': True, 'street': False, 'validated':...",,False,,False,'average',False,'casual',False,True,3,True,False,'no',vAbRNF0hJt91oCUpm_sdKw,3.5
4,'full_bar',"{'romantic': False, 'intimate': False, 'classy...",False,,"{'garage': True, 'street': False, 'validated':...",False,False,"{'dessert': False, 'latenight': False, 'lunch'...",False,'loud',False,'dressy',False,True,3,True,False,'no',z8H4Mdzl4jS9pYswj6Jf9w,3.5


In [40]:
attributes_list = ['BikeParking', 'BusinessAcceptsCreditCards', 'HasTV', 'Caters', 'GoodForKids', 'OutdoorSeating', 
                   'RestaurantsDelivery', 'RestaurantsGoodForGroups', 'RestaurantsReservations', 'RestaurantsTakeOut']
result_list = []

for i in attributes_list:
    result = stats.f_oneway(attributes_stars_df['stars'][attributes_stars_df[i] == 'False'], 
                            attributes_stars_df['stars'][attributes_stars_df[i] == 'True'])
    result_list.append(result)

In [41]:
attributes_list.append('Alcohol')
result = stats.f_oneway(attributes_stars_df['stars'][attributes_stars_df['Alcohol'] == "'full_bar'"], 
                        attributes_stars_df['stars'][attributes_stars_df['Alcohol'] == "'beer_and_wine'"], 
                        attributes_stars_df['stars'][attributes_stars_df['Alcohol'] == "'none'"])
result_list.append(result)

In [42]:
attributes_list.append('NoiseLevel')
result = stats.f_oneway(attributes_stars_df['stars'][attributes_stars_df['NoiseLevel'] == "'average'"], 
                        attributes_stars_df['stars'][attributes_stars_df['NoiseLevel'] == "'loud'"], 
                        attributes_stars_df['stars'][attributes_stars_df['NoiseLevel'] == "'very_loud'"],
                        attributes_stars_df['stars'][attributes_stars_df['NoiseLevel'] == "'quiet'"])
result_list.append(result)

In [43]:
attributes_list.append('RestaurantsAttire')
result = stats.f_oneway(attributes_stars_df['stars'][attributes_stars_df['RestaurantsAttire'] == "'casual'"], 
                        attributes_stars_df['stars'][attributes_stars_df['RestaurantsAttire'] == "'dressy'"])
result_list.append(result)

In [44]:
attributes_list.append('RestaurantsPriceRange2')
result = stats.f_oneway(attributes_stars_df['stars'][attributes_stars_df['RestaurantsPriceRange2'] == '1'], 
                        attributes_stars_df['stars'][attributes_stars_df['RestaurantsPriceRange2'] == '2'], 
                        attributes_stars_df['stars'][attributes_stars_df['RestaurantsPriceRange2'] == '3'], 
                        attributes_stars_df['stars'][attributes_stars_df['RestaurantsPriceRange2'] == '4'], )
result_list.append(result)

In [45]:
attributes_list.append('WiFi')
result = stats.f_oneway(attributes_stars_df['stars'][attributes_stars_df['WiFi'] == "'free'"], 
                        attributes_stars_df['stars'][attributes_stars_df['WiFi'] == "'no'"], 
                        attributes_stars_df['stars'][attributes_stars_df['WiFi'] == "'paid'"])
result_list.append(result)

In [46]:
attributes_anova = pd.concat([pd.Series(attributes_list), pd.DataFrame(result_list)], axis=1)
attributes_anova.columns = ["Attributes", "f-statistic", "p-value"]
attributes_anova = attributes_anova.sort_values(by='p-value')
attributes_anova

Unnamed: 0,Attributes,f-statistic,p-value
11,NoiseLevel,7.286034,8.9e-05
1,BusinessAcceptsCreditCards,13.533684,0.000272
14,WiFi,6.255627,0.002101
4,GoodForKids,9.100096,0.002707
12,RestaurantsAttire,8.916933,0.002988
10,Alcohol,4.637304,0.010174
9,RestaurantsTakeOut,4.090352,0.043746
7,RestaurantsGoodForGroups,3.815496,0.051427
3,Caters,3.018174,0.083108
13,RestaurantsPriceRange2,1.986279,0.115335


In [47]:
attributes_imp = attributes_anova.loc[:3]
attributes_imp

Unnamed: 0,Attributes,f-statistic,p-value
11,NoiseLevel,7.286034,8.9e-05
1,BusinessAcceptsCreditCards,13.533684,0.000272
14,WiFi,6.255627,0.002101
4,GoodForKids,9.100096,0.002707
12,RestaurantsAttire,8.916933,0.002988
10,Alcohol,4.637304,0.010174
9,RestaurantsTakeOut,4.090352,0.043746
7,RestaurantsGoodForGroups,3.815496,0.051427
3,Caters,3.018174,0.083108


In [48]:
mean_df = rp.summary_cont(attributes_stars_df['stars'].groupby(attributes_stars_df['Alcohol']))
mean_df["Mean"]





Alcohol
'beer_and_wine'    3.747664
'full_bar'         3.577731
'none'             3.712644
Name: Mean, dtype: float64

## Attributes contain subdivided levels

* For **GoodForMeal**:

In [49]:
set(attributes_stars_df['GoodForMeal'])

{"{'dessert': False, 'latenight': False, 'lunch': False, 'dinner': True, 'brunch': False, 'breakfast': False}",
 "{'dessert': True, 'latenight': False, 'lunch': True, 'dinner': False, 'brunch': True, 'breakfast': False}",
 "{'dessert': True, 'latenight': False, 'lunch': True, 'dinner': True, 'brunch': True, 'breakfast': True}",
 "{'dessert': False, 'latenight': False, 'lunch': True, 'dinner': False, 'brunch': True, 'breakfast': True}",
 "{'dessert': False, 'latenight': True, 'lunch': True, 'dinner': False, 'brunch': False, 'breakfast': False}",
 "{'dessert': False, 'latenight': True, 'lunch': True, 'dinner': True, 'brunch': False, 'breakfast': False}",
 "{'dessert': False, 'latenight': True, 'lunch': False, 'dinner': True, 'brunch': False, 'breakfast': False}",
 "{'dessert': False, 'latenight': False, 'lunch': True, 'dinner': True, 'brunch': True, 'breakfast': False}",
 "{'dessert': True, 'latenight': False, 'lunch': True, 'dinner': True, 'brunch': False, 'breakfast': False}",
 "{'dess

In [50]:
dessert = []
latenight = []
lunch = []
dinner = []
brunch = []
breakfast = []

for i in range(len(attributes_stars_df)):
    a = attributes_stars_df['GoodForMeal'][i]
    if a is None:
        dessert.append(None)
        latenight.append(None)
        lunch.append(None)
        dinner.append(None)
        brunch.append(None)
        breakfast.append(None)
    else:
        b = a[1:-1] 
        c = b.split(", ")
        dessert.append(c[0].split(": ")[1]) # get True/False and put in the list
        latenight.append(c[1].split(": ")[1])
        lunch.append(c[2].split(": ")[1])
        dinner.append(c[3].split(": ")[1])
        brunch.append(c[4].split(": ")[1])
        breakfast.append(c[5].split(": ")[1])

In [51]:
GoodForMeal_df = pd.concat([attributes_stars_df[['business_id', 'stars']], pd.Series(dessert), pd.Series(latenight), 
                            pd.Series(lunch), pd.Series(dinner), pd.Series(brunch), pd.Series(breakfast)], axis=1)
GoodForMeal_df.columns = ["business_id", "stars", "dessert", "latenight", "lunch", "dinner", "brunch", "breakfast"]
GoodForMeal_df.head()

Unnamed: 0,business_id,stars,dessert,latenight,lunch,dinner,brunch,breakfast
0,nsNONDHbV7Vudqh21uicqw,3.5,False,False,False,True,False,False
1,F06m2yQSPHIrb1IT7heYeQ,4.0,False,True,True,True,False,False
2,c_FTil8s5PS2l_YJDQAXhA,4.0,False,True,True,True,False,False
3,vAbRNF0hJt91oCUpm_sdKw,3.5,,,,,,
4,z8H4Mdzl4jS9pYswj6Jf9w,3.5,False,False,False,True,False,False


In [52]:
GoodForMeal_list = ["dessert", "latenight", "lunch", "dinner", "brunch", "breakfast"]
result_list = []

for i in GoodForMeal_list:
    result = stats.f_oneway(GoodForMeal_df['stars'][GoodForMeal_df[i] == 'False'], 
                            GoodForMeal_df['stars'][GoodForMeal_df[i] == 'True'])
    result_list.append(result)

In [53]:
GoodForMeal_anova = pd.concat([pd.Series(GoodForMeal_list), pd.DataFrame(result_list)], axis=1)
GoodForMeal_anova.columns = ["GoodForMeal", "f-statistic", "p-value"]
GoodForMeal_anova = GoodForMeal_anova.sort_values(by='p-value')
GoodForMeal_anova

Unnamed: 0,GoodForMeal,f-statistic,p-value
1,latenight,2.677753,0.102589
4,brunch,1.455042,0.228473
5,breakfast,0.462183,0.49702
2,lunch,0.014295,0.904893
3,dinner,0.010353,0.91901
0,dessert,0.000989,0.974926


* For **BusinessParking**:

In [54]:
set(attributes_stars_df['BusinessParking'])

{'None',
 "{'garage': False, 'street': False, 'validated': False, 'lot': False, 'valet': False}",
 "{'garage': False, 'street': False, 'validated': False, 'lot': False, 'valet': True}",
 "{'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}",
 "{'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': True}",
 "{'garage': False, 'street': False, 'validated': True, 'lot': False, 'valet': False}",
 "{'garage': False, 'street': False, 'validated': True, 'lot': True, 'valet': False}",
 "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}",
 "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': True}",
 "{'garage': False, 'street': True, 'validated': False, 'lot': True, 'valet': False}",
 "{'garage': True, 'street': False, 'validated': False, 'lot': False, 'valet': False}",
 "{'garage': True, 'street': False, 'validated': False, 'lot': False, 'valet': True}",
 "{'garage': True, 'street'

In [55]:
garage = []
street = []
validated = []
lot = []
valet = []

for i in range(len(attributes_stars_df)):
    a = attributes_stars_df['BusinessParking'][i]
    if a == 'None':
        garage.append(False)
        street.append(False)
        validated.append(False)
        lot.append(False)
        valet.append(False)
    else:
        b = a[1:-1] 
        c = b.split(", ")
        garage.append(c[0].split(": ")[1]) # get True/False and put in the list
        street.append(c[1].split(": ")[1])
        validated.append(c[2].split(": ")[1])
        lot.append(c[3].split(": ")[1])
        valet.append(c[4].split(": ")[1])

In [56]:
BusinessParking_df = pd.concat([attributes_stars_df[['business_id', 'stars']], pd.Series(garage), pd.Series(street), 
                                pd.Series(validated), pd.Series(lot), pd.Series(valet)], axis=1)
BusinessParking_df.columns = ["business_id", "stars", "garage", "street", "validated", "lot", "valet"]
BusinessParking_df.head()

Unnamed: 0,business_id,stars,garage,street,validated,lot,valet
0,nsNONDHbV7Vudqh21uicqw,3.5,False,False,False,True,False
1,F06m2yQSPHIrb1IT7heYeQ,4.0,False,False,False,True,False
2,c_FTil8s5PS2l_YJDQAXhA,4.0,False,True,False,False,False
3,vAbRNF0hJt91oCUpm_sdKw,3.5,True,False,False,False,True
4,z8H4Mdzl4jS9pYswj6Jf9w,3.5,True,False,False,False,False


In [57]:
BusinessParking_list = ["garage", "street", "validated", "lot", "valet"]
result_list = []

for i in BusinessParking_list:
    result = stats.f_oneway(BusinessParking_df['stars'][BusinessParking_df[i] == 'False'], 
                            BusinessParking_df['stars'][BusinessParking_df[i] == 'True'])
    result_list.append(result)

In [58]:
BusinessParking_anova = pd.concat([pd.Series(BusinessParking_list), pd.DataFrame(result_list)], axis=1)
BusinessParking_anova.columns = ["BusinessParking", "f-statistic", "p-value"]
BusinessParking_anova = BusinessParking_anova.sort_values(by='p-value')
BusinessParking_anova

Unnamed: 0,BusinessParking,f-statistic,p-value
2,validated,5.177691,0.023371
1,street,0.707008,0.400907
4,valet,0.475792,0.490708
0,garage,0.066927,0.795988
3,lot,0.027634,0.86805


## MANOVA

In [75]:
final_df = pd.read_csv("business_attributes.csv")
final_df.head()

Unnamed: 0.1,Unnamed: 0,business_id,stars,RestaurantsTableService,BusinessAcceptsCreditCards,GoodForMeal,Caters,BikeParking,WiFi,NoiseLevel,...,HasTV,RestaurantsDelivery,RestaurantsPriceRange2,Ambience,OutdoorSeating,RestaurantsGoodForGroups,RestaurantsReservations,BusinessParking,RestaurantsTakeOut,GoodForKids
0,0,nsNONDHbV7Vudqh21uicqw,3.5,,True,"{'dessert': False, 'latenight': False, 'lunch'...",False,False,'no','average',...,True,False,3,"{'touristy': False, 'hipster': False, 'romanti...",False,True,True,"{'garage': False, 'street': False, 'validated'...",False,False
1,1,F06m2yQSPHIrb1IT7heYeQ,4.0,True,True,"{'dessert': False, 'latenight': True, 'lunch':...",True,True,'free','average',...,True,False,2,"{'touristy': False, 'hipster': False, 'romanti...",False,True,True,"{'garage': False, 'street': False, 'validated'...",True,True
2,2,c_FTil8s5PS2l_YJDQAXhA,4.0,,True,"{'dessert': False, 'latenight': True, 'lunch':...",False,True,'no','average',...,True,False,2,"{'touristy': False, 'hipster': False, 'romanti...",False,True,True,"{'garage': False, 'street': True, 'validated':...",True,False
3,3,vAbRNF0hJt91oCUpm_sdKw,3.5,,True,,,False,'no','average',...,False,False,3,,False,True,True,"{'garage': True, 'street': False, 'validated':...",False,False
4,4,z8H4Mdzl4jS9pYswj6Jf9w,3.5,True,True,"{'dessert': False, 'latenight': False, 'lunch'...",False,False,'no','loud',...,False,False,3,"{'romantic': False, 'intimate': False, 'classy...",False,True,True,"{'garage': True, 'street': False, 'validated':...",False,False


In [78]:
stars = final_df['stars']
NoiseLevel = final_df['NoiseLevel']
BusinessAcceptsCreditCards = final_df['BusinessAcceptsCreditCards']
WiFi = final_df['WiFi']
GoodForKids = final_df['GoodForKids']
RestaurantsAttire = final_df['RestaurantsAttire']
Alcohol = final_df['Alcohol']
RestaurantsTakeOut = final_df['RestaurantsTakeOut']
RestaurantsGoodForGroups = final_df['RestaurantsGoodForGroups']
Caters = final_df['Caters']

In [61]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [82]:
model = ols('stars ~ NoiseLevel * BusinessAcceptsCreditCards * WiFi * GoodForKids * RestaurantsAttire * Alcohol * \
            RestaurantsTakeOut * RestaurantsGoodForGroups * Caters', data=final_df).fit()
table = sm.stats.anova_lm(model, typ=9) # Type 9 ANOVA DataFrame
print(table)

ValueError: Type 9 not understood

In [85]:
Y = [1,3,4,5,2,3,4]
X = range(1,8)
X = sm.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()
results.params

array([2.14285714, 0.25      ])