# Restaurant Industry Consulting Firm
July 16, 2019<br>
Ngoc, Inferential Statistics<br>
Welch’s T-Tests for Open_Pass_Midnight in the DMV Area

-----------------

In this notebook, we want to:
- Find 2 most popular cuisines in each area (DC, VA, and MD) then t-test to figure out which one is best in each area (vs. number of reviews)
- Over night (vs. number of reviews, stars)

-------------------

## Import Needed Libraries

In [1]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import helper_functions as hf
import matplotlib.pyplot as plt

from scipy import stats

sns.set_style("whitegrid")
warnings.filterwarnings("ignore")

## Load Needed Data

In [2]:
dc = pd.read_csv("data/dc_restaurants_midnight.csv")
va = pd.read_csv("data/va_restaurants_midnight.csv")
md = pd.read_csv("data/md_restaurants_midnight.csv")

## Inferential Statistics

$\alpha$ = 0.05

Helper functions:

In [3]:
def bootstrap(sample, n):
    return np.random.choice(sample, size=n, replace=True)

In [4]:
def sampling(samples, n, num):
    sample_means = []
    for i in range(num):
        sample_means.append(bootstrap(samples, n).mean())
    return sample_means

### DC

In [5]:
dc_is_open = dc[dc.open_pass_midnight == True]
dc_is_open.reset_index(inplace=True, drop=True)
print(len(dc_is_open))
dc_is_open.head()

527


Unnamed: 0,id,name,price,rating,review_count,open_pass_midnight,state
0,-8cgezcs-xEpJ_K4O8LxVA,Harry's Restaurant,$$,3.0,449,True,DC
1,-OMQv4qP5wTkQ5viTIDVuw,Oyster Riot @ Old Ebbitt Grill,$$$$,4.0,18,True,DC
2,-XkoQgIL-wgHCkqRg9rhQQ,Ben's Next Door,$$,3.5,687,True,DC
3,-bEH3OOngPUCYRGJsnkkDQ,Dupont Italian Kitchen,$$,3.5,319,True,DC
4,-bEzBJAavdMoQTHWxUJ4bA,El Don Restaurant,$,3.0,7,True,DC


In [6]:
dc_is_open_review_count = dc_is_open.review_count

In [7]:
dc_is_close = dc[dc.open_pass_midnight == False]
dc_is_close.reset_index(inplace=True, drop=True)
print(len(dc_is_close))
dc_is_close.head()

2152


Unnamed: 0,id,name,price,rating,review_count,open_pass_midnight,state
0,-3r9TfGuLzx_CRh9b8u6iA,Absolute Thai Restaurant,$$,3.5,374,False,DC
1,-7AZWvUzAiAI-rnAW-WoqQ,Ricks Cafe,$$,3.5,72,False,DC
2,-8KtYXAQNOh1339Z7gl4WQ,Chopt Creative Salad Co.,$,3.0,50,False,DC
3,-9X0Ukem1bpQP0wEW7RRSg,KT Pizza,$$,4.0,34,False,DC
4,-9sZ7KDcpFEY1hLKMI33Vw,Mario's House Pizza,$,4.5,18,False,DC


In [8]:
dc_is_close_review_count = dc_is_close.review_count

In [9]:
stats.shapiro(dc_is_open_review_count)

(0.48723721504211426, 2.7566316485267286e-36)

In [10]:
stats.shapiro(dc_is_close_review_count)

(0.37596583366394043, 0.0)

In [11]:
np.random.seed(654121)
dc_is_open_review_count_bs = sampling(dc_is_open_review_count, 100000, 1000)
dc_is_close_review_count_bs = sampling(dc_is_close_review_count, 100000, 1000)

In [12]:
stats.shapiro(dc_is_open_review_count_bs)

(0.9985178709030151, 0.5663415789604187)

(0.9985178709030151, 0.5663415789604187)

In [13]:
stats.shapiro(dc_is_close_review_count_bs)

(0.9976388812065125, 0.1620471328496933)

(0.9976388812065125, 0.1620471328496933)

In [14]:
stats.ttest_ind(dc_is_open_review_count_bs, dc_is_close_review_count_bs, equal_var=False)

Ttest_indResult(statistic=1600.7247146141156, pvalue=0.0)

### VA

In [15]:
va_is_open = va[va.open_pass_midnight == True]
va_is_open.reset_index(inplace=True, drop=True)
print(len(va_is_open))
va_is_open.head()

313


Unnamed: 0,id,name,price,rating,review_count,open_pass_midnight,state
0,-7mtTaJVlmx9TqWM-CAw-A,Breeze Bakery Cafe,$,4.0,874,True,VA
1,-8TkrTNeebNwFGcfDKJgRQ,Viva Tequila,$$,1.5,26,True,VA
2,-ZVeyeJEL0jHiUO_-u8CAw,Rock It Grill,$$,3.0,268,True,VA
3,-fJExaZdJT03DbTGmxEuCg,Fuzion Hookah Lounge & Vape,$$,2.5,40,True,VA
4,-fZ_-5koplvoYPLegzeXQw,Vermilion,$$$,4.0,807,True,VA


In [16]:
va_is_open_review_count = va_is_open.review_count

In [17]:
va_is_close = va[va.open_pass_midnight == False]
va_is_close.reset_index(inplace=True, drop=True)
print(len(va_is_close))
va_is_close.head()

1768


Unnamed: 0,id,name,price,rating,review_count,open_pass_midnight,state
0,-0EptA8Ci4gad0arJph4VQ,California Pizza Kitchen at Tysons Corner,$$,3.5,192,False,VA
1,-34O36suTEIwy0DIvpW8PA,Dunkin’ Donuts,$,4.5,2,False,VA
2,-4S7XMRABeCDOvJ7lKqgtw,Twist Cafe & Deli,$,4.5,29,False,VA
3,-8E7ufgStmiNHW25NwOUGA,Five Guys,$,4.0,85,False,VA
4,-9XZ1B_p6j8HBnSzqdMzJQ,Chicken House,$,3.0,14,False,VA


In [18]:
va_is_close_review_count = va_is_close.review_count

In [19]:
stats.shapiro(va_is_open_review_count)

(0.7312374114990234, 3.6642662253808003e-22)

In [20]:
stats.shapiro(va_is_close_review_count)

(0.652064323425293, 0.0)

In [21]:
np.random.seed(784518)
va_is_open_review_count_bs = sampling(va_is_open_review_count, 100000, 1000)
va_is_close_review_count_bs = sampling(va_is_close_review_count, 100000, 1000)

In [22]:
stats.shapiro(va_is_open_review_count_bs)

(0.9986493587493896, 0.6543724536895752)

(0.9986493587493896, 0.6543724536895752)

In [23]:
stats.shapiro(va_is_close_review_count_bs)

(0.9983330368995667, 0.4506615102291107)

(0.9983330368995667, 0.4506615102291107)

In [24]:
stats.ttest_ind(va_is_open_review_count_bs, va_is_close_review_count_bs, equal_var=False)

Ttest_indResult(statistic=2309.274185339508, pvalue=0.0)

### MD

In [25]:
md_is_open = md[md.open_pass_midnight == True]
md_is_open.reset_index(inplace=True, drop=True)
print(len(md_is_open))
md_is_open.head()

277


Unnamed: 0,id,name,price,rating,review_count,open_pass_midnight,state
0,-1Jfl_zix9l1RRWOPmzSEQ,IHOP,$$,2.5,125,True,MD
1,-4B2Bm0hXrr043xOj85dog,Cancun Grill Restaurant,$$,2.5,2,True,MD
2,-EbZgoRwfoHtYkGN2VVbVw,Amo Los Tacos,$$,2.5,34,True,MD
3,-HFG7EOMJ_no-rKdYxbojg,Clyde's of Chevy Chase,$$,3.0,421,True,MD
4,-NVpjIM1b0_4PbdX1d4wXw,Balagger Ethiopian Restaurant and Bar,$$,3.0,26,True,MD


In [26]:
md_is_open_review_count = md_is_open.review_count

In [27]:
md_is_close = md[md.open_pass_midnight == False]
md_is_close.reset_index(inplace=True, drop=True)
print(len(md_is_close))
md_is_close.head()

1691


Unnamed: 0,id,name,price,rating,review_count,open_pass_midnight,state
0,--saaXOpXBfC6-pvmTP5mw,Not Your Average Joe's,$$,3.5,372,False,MD
1,-34232Z9Njha7auSGehjdQ,Capitol Seafood & Crab,$,4.0,13,False,MD
2,-41K7K3wDEpHSeJtbgKMNQ,The Spot Mini,,3.5,3,False,MD
3,-5nB1PYtZLJgmrOtHrWt0A,Taco Rico,$,4.5,18,False,MD
4,-7s50sIf1d1MizIOIMqcDQ,Conservatory Cafe,$$,3.0,4,False,MD


In [28]:
md_is_close_review_count = md_is_close.review_count

In [29]:
stats.shapiro(md_is_open_review_count)

(0.6407667398452759, 9.523610841841145e-24)

In [30]:
stats.shapiro(md_is_close_review_count)

(0.5718902945518494, 0.0)

In [31]:
np.random.seed(954545)
md_is_open_review_count_bs = sampling(md_is_open_review_count, 100000, 1000)
md_is_close_review_count_bs = sampling(md_is_close_review_count, 100000, 1000)

In [32]:
stats.shapiro(md_is_open_review_count_bs)

(0.9979653358459473, 0.2684260904788971)

(0.9979653358459473, 0.2684260904788971)

In [33]:
stats.shapiro(md_is_close_review_count_bs)

(0.9987289905548096, 0.7080173492431641)

(0.9987289905548096, 0.7080173492431641)

In [34]:
stats.ttest_ind(md_is_open_review_count_bs, md_is_close_review_count_bs, equal_var=False)

Ttest_indResult(statistic=130.7624749763433, pvalue=0.0)