# Initial Exploration 

## Import Libraries

In [1]:
#Disable Warnings
import warnings
warnings.filterwarnings("ignore")

#Import pandas for dataframes
import pandas as pd
import numpy as np

#Graphing imports
import seaborn as sns
import matplotlib.pyplot as plt

#Import Simple Imputer
from sklearn.impute import SimpleImputer

from scipy import stats

from sklearn.model_selection import train_test_split

## Acquire Data

In [2]:
df = pd.read_csv(r'/Users/malachihale/codeup-data-science/hotel_bookings_project/hotel_bookings.csv')

In [3]:
df.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,deposit_type,agent,company,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,No Deposit,,,0,Transient,75.0,0,0,Check-Out,2015-07-02
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,No Deposit,304.0,,0,Transient,75.0,0,0,Check-Out,2015-07-02
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,No Deposit,240.0,,0,Transient,98.0,0,1,Check-Out,2015-07-03


In [4]:
df.columns.to_list()

['hotel',
 'is_canceled',
 'lead_time',
 'arrival_date_year',
 'arrival_date_month',
 'arrival_date_week_number',
 'arrival_date_day_of_month',
 'stays_in_weekend_nights',
 'stays_in_week_nights',
 'adults',
 'children',
 'babies',
 'meal',
 'country',
 'market_segment',
 'distribution_channel',
 'is_repeated_guest',
 'previous_cancellations',
 'previous_bookings_not_canceled',
 'reserved_room_type',
 'assigned_room_type',
 'booking_changes',
 'deposit_type',
 'agent',
 'company',
 'days_in_waiting_list',
 'customer_type',
 'adr',
 'required_car_parking_spaces',
 'total_of_special_requests',
 'reservation_status',
 'reservation_status_date']

In [5]:
df.dtypes

hotel                              object
is_canceled                         int64
lead_time                           int64
arrival_date_year                   int64
arrival_date_month                 object
arrival_date_week_number            int64
arrival_date_day_of_month           int64
stays_in_weekend_nights             int64
stays_in_week_nights                int64
adults                              int64
children                          float64
babies                              int64
meal                               object
country                            object
market_segment                     object
distribution_channel               object
is_repeated_guest                   int64
previous_cancellations              int64
previous_bookings_not_canceled      int64
reserved_room_type                 object
assigned_room_type                 object
booking_changes                     int64
deposit_type                       object
agent                             

In [6]:
for col in df.columns:
    print(df[col].value_counts())

City Hotel      79330
Resort Hotel    40060
Name: hotel, dtype: int64
0    75166
1    44224
Name: is_canceled, dtype: int64
0      6345
1      3460
2      2069
3      1816
4      1715
       ... 
370       1
435       1
371       1
380       1
709       1
Name: lead_time, Length: 479, dtype: int64
2016    56707
2017    40687
2015    21996
Name: arrival_date_year, dtype: int64
August       13877
July         12661
May          11791
October      11160
April        11089
June         10939
September    10508
March         9794
February      8068
November      6794
December      6780
January       5929
Name: arrival_date_month, dtype: int64
33    3580
30    3087
32    3045
34    3040
18    2926
21    2854
28    2853
17    2805
20    2785
29    2763
42    2756
31    2741
41    2699
15    2689
27    2664
25    2663
38    2661
23    2621
35    2593
39    2581
22    2546
24    2498
13    2416
16    2405
19    2402
40    2397
26    2391
43    2352
44    2272
14    2264
37    2229
8     2216
36

## Prepare Data

In [7]:
def handle_missing_values(df, prop_required_column = .5, prop_required_row = .75):
    '''
    We will eliminate all columns with less than 50% non-null, and all rows with less than 75% non-null.
    '''
    threshold = int(round(prop_required_column*len(df.index),0))
    df.dropna(axis=1, thresh=threshold, inplace=True)
    threshold = int(round(prop_required_row*len(df.columns),0))
    df.dropna(axis=0, thresh=threshold, inplace=True)
    return df

In [8]:
def impute_null_values(df):
    '''
    We will use SimpleImputer to impute the mean value into the null values into each column.
    '''
    #We will use the mean imputer function.
    imputer = SimpleImputer(strategy='most_frequent')

    #We will create a for loop that will impute all the null values in each one of our columns.
    for col in df.columns:
        df[[col]] = imputer.fit_transform(df[[col]])
    
    return df

In [9]:
df = handle_missing_values(df)

In [10]:
df = impute_null_values(df)

In [11]:
df.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,booking_changes,deposit_type,agent,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,3,No Deposit,9.0,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,4,No Deposit,9.0,0,Transient,0.0,0,0,Check-Out,2015-07-01
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,0,No Deposit,9.0,0,Transient,75.0,0,0,Check-Out,2015-07-02
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,0,No Deposit,304.0,0,Transient,75.0,0,0,Check-Out,2015-07-02
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,0,No Deposit,240.0,0,Transient,98.0,0,1,Check-Out,2015-07-03


In [12]:
df.shape

(119390, 31)

In [13]:
df['reservation_status_date'] = pd.to_datetime(df['reservation_status_date'])

In [14]:
df.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,booking_changes,deposit_type,agent,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,3,No Deposit,9.0,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,4,No Deposit,9.0,0,Transient,0.0,0,0,Check-Out,2015-07-01
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,0,No Deposit,9.0,0,Transient,75.0,0,0,Check-Out,2015-07-02
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,0,No Deposit,304.0,0,Transient,75.0,0,0,Check-Out,2015-07-02
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,0,No Deposit,240.0,0,Transient,98.0,0,1,Check-Out,2015-07-03


In [15]:
pd.set_option('display.max_colwidth', None)

In [16]:
df.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,booking_changes,deposit_type,agent,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,3,No Deposit,9.0,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,4,No Deposit,9.0,0,Transient,0.0,0,0,Check-Out,2015-07-01
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,0,No Deposit,9.0,0,Transient,75.0,0,0,Check-Out,2015-07-02
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,0,No Deposit,304.0,0,Transient,75.0,0,0,Check-Out,2015-07-02
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,0,No Deposit,240.0,0,Transient,98.0,0,1,Check-Out,2015-07-03


In [17]:
df['date_arrived'] = df["arrival_date_year"].astype(str) + str(" ") + df["arrival_date_month"] + str(" ") + df["arrival_date_day_of_month"].astype(str)

In [18]:
df.date_arrived

0            2015 July 1
1            2015 July 1
2            2015 July 1
3            2015 July 1
4            2015 July 1
               ...      
119385    2017 August 30
119386    2017 August 31
119387    2017 August 31
119388    2017 August 31
119389    2017 August 29
Name: date_arrived, Length: 119390, dtype: object

In [19]:
df['date_arrived'] = pd.to_datetime(df['date_arrived'])

In [20]:
df.date_arrived

0        2015-07-01
1        2015-07-01
2        2015-07-01
3        2015-07-01
4        2015-07-01
            ...    
119385   2017-08-30
119386   2017-08-31
119387   2017-08-31
119388   2017-08-31
119389   2017-08-29
Name: date_arrived, Length: 119390, dtype: datetime64[ns]

In [21]:
def split_data(df):
    '''
    take in a DataFrame and return train, validate, and test DataFrames.
    return train, validate, test DataFrames.
    '''
    
    # splits df into train_validate and test using train_test_split() stratifying on churn to get an even mix of each churn, yes or no
    train_validate, test = train_test_split(df, test_size=.2, random_state=123)
    
    # splits train_validate into train and validate using train_test_split() stratifying on churn to get an even mix of each churn
    train, validate = train_test_split(train_validate, 
                                       test_size=.3, 
                                       random_state=123)
    return train, validate, test

In [22]:
train, validate, test = split_data(df)

In [23]:
train.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,deposit_type,agent,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date,date_arrived
43920,City Hotel,1,87,2015,September,39,25,2,3,2,...,Non Refund,1.0,0,Transient,170.0,0,0,Canceled,2015-09-09,2015-09-25
3642,Resort Hotel,0,49,2015,December,53,28,1,3,2,...,No Deposit,240.0,0,Transient,89.5,1,2,Check-Out,2016-01-01,2015-12-28
27542,Resort Hotel,0,89,2016,August,36,29,1,3,2,...,No Deposit,242.0,0,Transient,183.5,0,1,Check-Out,2016-09-02,2016-08-29
110598,City Hotel,0,294,2017,April,17,28,0,1,2,...,No Deposit,9.0,0,Transient,99.0,0,1,Check-Out,2017-04-29,2017-04-28
74449,City Hotel,1,304,2015,August,34,17,1,1,2,...,Non Refund,1.0,0,Contract,62.0,0,0,Canceled,2015-01-01,2015-08-17


## Exploration

In [24]:
for col in df.columns:
    print(df[col].value_counts())

City Hotel      79330
Resort Hotel    40060
Name: hotel, dtype: int64
0    75166
1    44224
Name: is_canceled, dtype: int64
0      6345
1      3460
2      2069
3      1816
4      1715
       ... 
370       1
435       1
371       1
380       1
709       1
Name: lead_time, Length: 479, dtype: int64
2016    56707
2017    40687
2015    21996
Name: arrival_date_year, dtype: int64
August       13877
July         12661
May          11791
October      11160
April        11089
June         10939
September    10508
March         9794
February      8068
November      6794
December      6780
January       5929
Name: arrival_date_month, dtype: int64
33    3580
30    3087
32    3045
34    3040
18    2926
21    2854
28    2853
17    2805
20    2785
29    2763
42    2756
31    2741
41    2699
15    2689
27    2664
25    2663
38    2661
23    2621
35    2593
39    2581
22    2546
24    2498
13    2416
16    2405
19    2402
40    2397
26    2391
43    2352
44    2272
14    2264
37    2229
8     2216
36

In [25]:
categorical = ['hotel', 
               'is_canceled', 
               'arrival_date_year', 
               'arrival_date_month', 
                'meal', 
              'country', 
              'market_segment', 
              'distribution_channel', 
              'is_repeated_guest', 
              'reserved_room_type', 
              'assigned_room_type', 
              'deposit_type',
              'agent',
              'customer_type', 
              'required_car_parking_spaces', 
              'reservation_status',
              'arrival_date_week_number',
                'arrival_date_day_of_month',
              'total_of_special_requests']

In [26]:
continuous = ['lead_time',
             'stays_in_weekend_nights',
           'stays_in_week_nights',
            'adults',
              'children',
              'babies',
            'previous_cancellations',
              'previous_bookings_not_canceled',
              'booking_changes',
              'days_in_waiting_list',
             ]

In [27]:
dates = ['reservation_status_date', 
        'date_arrived']

In [28]:
for col in continuous:
    test_results = stats.pearsonr(train.adr, train[col])
    print(col, "correlation with daily rate")
    print(test_results)
    print(" ")

lead_time correlation with daily rate
(-0.06360019511957397, 6.94673424302412e-61)
 
stays_in_weekend_nights correlation with daily rate
(0.048688909963678625, 2.202071121463805e-36)
 
stays_in_week_nights correlation with daily rate
(0.05999294614060785, 2.312993798562837e-54)
 
adults correlation with daily rate
(0.2288320597451874, 0.0)
 
children correlation with daily rate
(0.30691703269270615, 0.0)
 
babies correlation with daily rate
(0.0266142060290196, 5.872749916106974e-12)
 
previous_cancellations correlation with daily rate
(-0.06220990055484778, 2.516027186592958e-58)
 
previous_bookings_not_canceled correlation with daily rate
(-0.06967613962130542, 9.858658835151844e-73)
 
booking_changes correlation with daily rate
(0.018860630830353272, 1.076461278740261e-06)
 
days_in_waiting_list correlation with daily rate
(-0.037838782319251016, 1.2770568431633983e-22)
 


In [29]:
categorical

['hotel',
 'is_canceled',
 'arrival_date_year',
 'arrival_date_month',
 'meal',
 'country',
 'market_segment',
 'distribution_channel',
 'is_repeated_guest',
 'reserved_room_type',
 'assigned_room_type',
 'deposit_type',
 'agent',
 'customer_type',
 'required_car_parking_spaces',
 'reservation_status',
 'arrival_date_week_number',
 'arrival_date_day_of_month',
 'total_of_special_requests']

### Hotel Type

In [30]:
train.hotel.value_counts()

City Hotel      44543
Resort Hotel    22315
Name: hotel, dtype: int64

In [31]:
city_hotel = train[train.hotel == 'City Hotel']
resort_hotel = train[train.hotel == "Resort Hotel"]

In [32]:
test_results = stats.ttest_ind(city_hotel.adr, resort_hotel.adr)
test_results

Ttest_indResult(statistic=24.3645011176397, pvalue=1.515001128004243e-130)

In [34]:
for col in categorical:
    print(train[col].value_counts())
    print(" ")

City Hotel      44543
Resort Hotel    22315
Name: hotel, dtype: int64
 
0    42197
1    24661
Name: is_canceled, dtype: int64
 
2016    31744
2017    22761
2015    12353
Name: arrival_date_year, dtype: int64
 
August       7725
July         7124
May          6529
October      6207
April        6179
June         6174
September    5889
March        5417
February     4575
November     3891
December     3773
January      3375
Name: arrival_date_month, dtype: int64
 
BB           51766
HB            8011
SC            6002
Undefined      643
FB             436
Name: meal, dtype: int64
 
PRT    27553
GBR     6693
FRA     5863
ESP     4840
DEU     4040
       ...  
GUY        1
COM        1
ATF        1
HND        1
UGA        1
Name: country, Length: 163, dtype: int64
 
Online TA        31737
Offline TA/TO    13565
Groups           10973
Direct            7029
Corporate         2998
Complementary      415
Aviation           139
Undefined            2
Name: market_segment, dtype: int64
 
TA/T

### Canceled

In [37]:
not_canceled = train[train.is_canceled == 0]
canceled = train[train.is_canceled == 1]

In [38]:
test_results = stats.ttest_ind(not_cancelled.adr, cancelled.adr)
test_results

Ttest_indResult(statistic=-11.842905472579185, pvalue=2.5231057668841014e-32)

### Arrived

In [44]:
arrived_2015 = train[train.arrival_date_year == 2015]
arrived_2016 = train[train.arrival_date_year == 2016]
arrived_2017 = train[train.arrival_date_year == 2017]

In [46]:
test_results = stats.ttest_1samp(arrived_2015.adr, train.adr.mean())
test_results

Ttest_1sampResult(statistic=-37.66847294275936, pvalue=4.8384386985830036e-294)

In [47]:
test_results = stats.ttest_1samp(arrived_2016.adr, train.adr.mean())
test_results

Ttest_1sampResult(statistic=-11.150751895195732, pvalue=8.032855099383927e-29)

In [48]:
test_results = stats.ttest_1samp(arrived_2017.adr, train.adr.mean())
test_results

Ttest_1sampResult(statistic=36.638993581686144, pvalue=1.3214760666002303e-285)

### Month

In [57]:
months = train.arrival_date_month.unique()

In [59]:
list_of_months = list(months)

In [61]:
for month in list_of_months:
    print(month)
    print(stats.ttest_1samp(train[train.arrival_date_month == month].adr, train.adr.mean()))
    print(" ")


September
Ttest_1sampResult(statistic=6.232005393097418, pvalue=4.924204540703545e-10)
 
December
Ttest_1sampResult(statistic=-28.050605358619908, pvalue=2.0421508792209118e-157)
 
August
Ttest_1sampResult(statistic=53.9788186544037, pvalue=0.0)
 
April
Ttest_1sampResult(statistic=-3.18384321238499, pvalue=0.0014605510489305352)
 
October
Ttest_1sampResult(statistic=-30.198211258121244, pvalue=4.922906525488024e-187)
 
July
Ttest_1sampResult(statistic=40.496518325457075, pvalue=0.0)
 
January
Ttest_1sampResult(statistic=-64.02277236105576, pvalue=0.0)
 
November
Ttest_1sampResult(statistic=-54.065961386804666, pvalue=0.0)
 
February
Ttest_1sampResult(statistic=-62.98372197747219, pvalue=0.0)
 
March
Ttest_1sampResult(statistic=-18.659303090838705, pvalue=2.3442075886511354e-75)
 
June
Ttest_1sampResult(statistic=30.138343623794484, pvalue=2.7631178387698705e-186)
 
May
Ttest_1sampResult(statistic=13.013940711677614, pvalue=3.0401145234314255e-38)
 


In [64]:
def one_sample_test(feature):
    first_array = train[feature].unique()
    first_list = list(first_array)
    
    for col in first_list:
        print(col)
        print(stats.ttest_1samp(train[train[feature] == col].adr, train.adr.mean()))
        print(" ")

### Meal

In [66]:
one_sample_test('meal')

BB
Ttest_1sampResult(statistic=-10.739878236329545, pvalue=7.059174089797693e-27)
 
SC
Ttest_1sampResult(statistic=-9.447213650800142, pvalue=4.868945778422467e-21)
 
HB
Ttest_1sampResult(statistic=29.433317961501547, pvalue=6.762498559896371e-181)
 
Undefined
Ttest_1sampResult(statistic=-4.160519991340183, pvalue=3.607609517928183e-05)
 
FB
Ttest_1sampResult(statistic=1.8855966819016665, pvalue=0.06001542958006228)
 


In [68]:
for element in categorical: 
    print(element)
    one_sample_test(element)
    print(" ")

hotel
City Hotel
Ttest_1sampResult(statistic=15.712200096386113, pvalue=1.7592664642776677e-55)
 
Resort Hotel
Ttest_1sampResult(statistic=-16.843202927918025, pvalue=2.895967658105495e-63)
 
 
is_canceled
1
Ttest_1sampResult(statistic=8.604386543953076, pvalue=8.122212268028148e-18)
 
0
Ttest_1sampResult(statistic=-7.642825572519328, pvalue=2.1699028373856572e-14)
 
 
arrival_date_year
2015
Ttest_1sampResult(statistic=-37.66847294275936, pvalue=4.8384386985830036e-294)
 
2016
Ttest_1sampResult(statistic=-11.150751895195732, pvalue=8.032855099383927e-29)
 
2017
Ttest_1sampResult(statistic=36.638993581686144, pvalue=1.3214760666002303e-285)
 
 
arrival_date_month
September
Ttest_1sampResult(statistic=6.232005393097418, pvalue=4.924204540703545e-10)
 
December
Ttest_1sampResult(statistic=-28.050605358619908, pvalue=2.0421508792209118e-157)
 
August
Ttest_1sampResult(statistic=53.9788186544037, pvalue=0.0)
 
April
Ttest_1sampResult(statistic=-3.18384321238499, pvalue=0.0014605510489305352

Ttest_1sampResult(statistic=-1.1201376485344068, pvalue=0.46396446214656983)
 
BIH
Ttest_1sampResult(statistic=0.2106493192621282, pvalue=0.8384273827076364)
 
KWT
Ttest_1sampResult(statistic=2.772856865195988, pvalue=0.02165213665553604)
 
PER
Ttest_1sampResult(statistic=1.2320195594885468, pvalue=0.2337940472074269)
 
TJK
Ttest_1sampResult(statistic=4.768959744824768, pvalue=0.008846823341739283)
 
MDV
Ttest_1sampResult(statistic=-0.2510108410097106, pvalue=0.8141728175468482)
 
MYS
Ttest_1sampResult(statistic=-5.175751601202044, pvalue=0.00017847038379049438)
 
MUS
Ttest_1sampResult(statistic=-2.5454275197993392, pvalue=0.2383104817793428)
 
VNM
Ttest_1sampResult(statistic=0.07847314730233539, pvalue=0.9412205239781931)
 
BLR
Ttest_1sampResult(statistic=0.19276185298152823, pvalue=0.8495710003525374)
 
LBN
Ttest_1sampResult(statistic=1.5507059168775388, pvalue=0.14496773254757866)
 
SEN
Ttest_1sampResult(statistic=0.8614307970509195, pvalue=0.4175293686594438)
 
CRI
Ttest_1sampResul

Ttest_1sampResult(statistic=-10.287334354346983, pvalue=1.1715087866729188e-10)
 
67.0
Ttest_1sampResult(statistic=-10.880618824042894, pvalue=1.2395820417633238e-15)
 
157.0
Ttest_1sampResult(statistic=-0.19401023042154017, pvalue=0.847394230719913)
 
375.0
Ttest_1sampResult(statistic=-14.86787479536797, pvalue=6.404256319576965e-14)
 
156.0
Ttest_1sampResult(statistic=-8.880097787515272, pvalue=7.905253755234542e-14)
 
155.0
Ttest_1sampResult(statistic=-4.927928760902748, pvalue=9.52929440386004e-06)
 
146.0
Ttest_1sampResult(statistic=-18.421873219675454, pvalue=1.6988782995312654e-27)
 
276.0
Ttest_1sampResult(statistic=-inf, pvalue=0.0)
 
31.0
Ttest_1sampResult(statistic=4.587697491126709, pvalue=1.4123303271538797e-05)
 
71.0
Ttest_1sampResult(statistic=-5.294853753507884, pvalue=4.067672855944083e-06)
 
310.0
Ttest_1sampResult(statistic=-3.6194780401418285, pvalue=0.004030142655150108)
 
526.0
Ttest_1sampResult(statistic=0.33522183331830646, pvalue=0.7510647865689619)
 
444.0
Tt

Ttest_1sampResult(statistic=1.9890328359542344, pvalue=0.18500408000999422)
 
446.0
Ttest_1sampResult(statistic=nan, pvalue=nan)
 
425.0
Ttest_1sampResult(statistic=3.969254818652714, pvalue=0.0021989035437136165)
 
117.0
Ttest_1sampResult(statistic=nan, pvalue=nan)
 
280.0
Ttest_1sampResult(statistic=nan, pvalue=nan)
 
107.0
Ttest_1sampResult(statistic=-inf, pvalue=0.0)
 
303.0
Ttest_1sampResult(statistic=1.039719888350702, pvalue=0.4876045378125456)
 
114.0
Ttest_1sampResult(statistic=nan, pvalue=nan)
 
331.0
Ttest_1sampResult(statistic=nan, pvalue=nan)
 
167.0
Ttest_1sampResult(statistic=1.5547766004081485, pvalue=0.36386979771045463)
 
165.0
Ttest_1sampResult(statistic=nan, pvalue=nan)
 
510.0
Ttest_1sampResult(statistic=nan, pvalue=nan)
 
497.0
Ttest_1sampResult(statistic=nan, pvalue=nan)
 
278.0
Ttest_1sampResult(statistic=nan, pvalue=nan)
 
141.0
Ttest_1sampResult(statistic=-0.6946990739063963, pvalue=0.5590973091755473)
 
285.0
Ttest_1sampResult(statistic=nan, pvalue=nan)
 
150

Ttest_1sampResult(statistic=-2.2768678345744857, pvalue=0.022897210486997516)
 
13
Ttest_1sampResult(statistic=3.071683946492371, pvalue=0.0021555823455758675)
 
31
Ttest_1sampResult(statistic=3.3155384625332904, pvalue=0.0009421808974436508)
 
3
Ttest_1sampResult(statistic=-3.1402002403931903, pvalue=0.001710888338536816)
 
18
Ttest_1sampResult(statistic=-1.1296183066963013, pvalue=0.25875560952559146)
 
11
Ttest_1sampResult(statistic=0.4302010932440899, pvalue=0.667095371510058)
 
 
total_of_special_requests
0
Ttest_1sampResult(statistic=-26.3194787893528, pvalue=2.369515339024458e-151)
 
2
Ttest_1sampResult(statistic=25.10643566060999, pvalue=1.688973399278345e-133)
 
1
Ttest_1sampResult(statistic=19.103359973998323, pvalue=1.3939844044175439e-80)
 
3
Ttest_1sampResult(statistic=13.898223579250836, pvalue=2.9989206669931014e-41)
 
4
Ttest_1sampResult(statistic=5.123263754183491, pvalue=7.252193202118327e-07)
 
5
Ttest_1sampResult(statistic=2.253082320366471, pvalue=0.035071904446356