# Data Aggregation and Group Operations

In [1]:
import pandas as pd
import numpy as np

## GroupBy mechanics

In [5]:
nrows = 10

np.random.seed(42)
df = pd.DataFrame({'company': np.random.choice(list('ab'), nrows),
              'data1': np.random.randn(nrows)*50 + 100,
              'city': np.random.choice(list('MP'), nrows),
              'income': np.random.randn(nrows)*30000 + 50000
      })
df

Unnamed: 0,company,data1,city,income
0,a,113.952065,P,52025.846141
1,b,150.525764,P,7257.554414
2,a,70.956093,P,33668.518264
3,a,73.74151,P,53327.677691
4,a,71.430992,P,15470.192677
5,b,53.795858,M,61270.94055
6,a,-30.627451,P,31980.839302
7,a,147.518484,P,41249.187506
8,a,140.822254,M,31948.801633
9,b,23.8062,P,105568.345535


In [7]:
df.groupby('company')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fa2d2fc15d0>

In [8]:
df.groupby('company').mean()

Unnamed: 0_level_0,data1,income
company,Unnamed: 1_level_1,Unnamed: 2_level_1
a,83.970564,37095.866174
b,76.042607,58032.280166


In [10]:
grouped_df = df.groupby('company')
grouped_df

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fa2fff1fd90>

In [12]:
grouped_df.sum()

Unnamed: 0_level_0,data1,income
company,Unnamed: 1_level_1,Unnamed: 2_level_1
a,587.793947,259671.063215
b,228.127822,174096.840499


In [14]:
df.groupby(['company', 'city']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,income
company,city,Unnamed: 2_level_1,Unnamed: 3_level_1
a,M,140.822254,31948.801633
a,P,74.495282,37953.710264
b,M,53.795858,61270.94055
b,P,87.165982,56412.949974


In [15]:
df.groupby(['company', 'city']).mean()['income']

company  city
a        M       31948.801633
         P       37953.710264
b        M       61270.940550
         P       56412.949974
Name: income, dtype: float64

In [16]:
df.groupby(['company', 'city'])['income'].mean()

company  city
a        M       31948.801633
         P       37953.710264
b        M       61270.940550
         P       56412.949974
Name: income, dtype: float64

In [17]:
means = df.groupby(['company', 'city'])['income'].mean()
means

company  city
a        M       31948.801633
         P       37953.710264
b        M       61270.940550
         P       56412.949974
Name: income, dtype: float64

In [18]:
means.index

MultiIndex([('a', 'M'),
            ('a', 'P'),
            ('b', 'M'),
            ('b', 'P')],
           names=['company', 'city'])

In [20]:
means['b', 'M']

61270.94055037016

In [22]:
means.reset_index()

Unnamed: 0,company,city,income
0,a,M,31948.801633
1,a,P,37953.710264
2,b,M,61270.94055
3,b,P,56412.949974


In [23]:
df.groupby(['company', 'city'], as_index=False)['income'].mean()

Unnamed: 0,company,city,income
0,a,M,31948.801633
1,a,P,37953.710264
2,b,M,61270.94055
3,b,P,56412.949974


### Iterating over groups

In [24]:
for name, group in df.groupby('company'):
    print(name)
    print('')
    print(group)
    print('-----------------')

a

  company       data1 city        income
0       a  113.952065    P  52025.846141
2       a   70.956093    P  33668.518264
3       a   73.741510    P  53327.677691
4       a   71.430992    P  15470.192677
6       a  -30.627451    P  31980.839302
7       a  147.518484    P  41249.187506
8       a  140.822254    M  31948.801633
-----------------
b

  company       data1 city         income
1       b  150.525764    P    7257.554414
5       b   53.795858    M   61270.940550
9       b   23.806200    P  105568.345535
-----------------


In [27]:
all_data = dict(list(df.groupby('company')))
all_data['a']

Unnamed: 0,company,data1,city,income
0,a,113.952065,P,52025.846141
2,a,70.956093,P,33668.518264
3,a,73.74151,P,53327.677691
4,a,71.430992,P,15470.192677
6,a,-30.627451,P,31980.839302
7,a,147.518484,P,41249.187506
8,a,140.822254,M,31948.801633


In [28]:
all_data['b']

Unnamed: 0,company,data1,city,income
1,b,150.525764,P,7257.554414
5,b,53.795858,M,61270.94055
9,b,23.8062,P,105568.345535


### Selecting a column or subset of columns

In [29]:
df

Unnamed: 0,company,data1,city,income
0,a,113.952065,P,52025.846141
1,b,150.525764,P,7257.554414
2,a,70.956093,P,33668.518264
3,a,73.74151,P,53327.677691
4,a,71.430992,P,15470.192677
5,b,53.795858,M,61270.94055
6,a,-30.627451,P,31980.839302
7,a,147.518484,P,41249.187506
8,a,140.822254,M,31948.801633
9,b,23.8062,P,105568.345535


In [30]:
df.groupby('company')['data1', 'city'].max()

Unnamed: 0_level_0,data1,city
company,Unnamed: 1_level_1,Unnamed: 2_level_1
a,147.518484,P
b,150.525764,P


In [43]:
df[df['company'] == 'a'][['data1', 'city']].max()

data1    147.518
city           P
dtype: object

In [44]:
df[df['company'] == 'b'][['data1', 'city']].max()

data1    150.526
city           P
dtype: object

## Data aggregation

In [46]:
df.groupby('city').mean()

Unnamed: 0_level_0,data1,income
city,Unnamed: 1_level_1,Unnamed: 2_level_1
M,97.309056,46609.871092
P,77.662957,42568.520191


In [47]:
df.groupby('city').first()

Unnamed: 0_level_0,company,data1,income
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
M,b,53.795858,61270.94055
P,a,113.952065,52025.846141


In [49]:
df.groupby('city')[['data1', 'income']].quantile(.9)

Unnamed: 0_level_0,data1,income
city,Unnamed: 1_level_1,Unnamed: 2_level_1
M,132.119614,58338.726659
P,148.420668,68999.878044


In [50]:
df.groupby('city').describe()

Unnamed: 0_level_0,data1,data1,data1,data1,data1,data1,data1,data1,income,income,income,income,income,income,income,income
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
city,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
M,2.0,97.309056,61.536955,53.795858,75.552457,97.309056,119.065655,140.822254,2.0,46609.871092,20733.883267,31948.801633,39279.336362,46609.871092,53940.405821,61270.94055
P,8.0,77.662957,61.216527,-30.627451,59.16862,72.586251,122.343669,150.525764,8.0,42568.520191,30115.045588,7257.554414,27853.177646,37458.852885,52351.304028,105568.345535


In [52]:
stats = df.groupby('city').describe()

In [56]:
df.groupby('city').count()

Unnamed: 0_level_0,company,data1,income
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
M,2,2,2
P,8,8,8


In [55]:
stats['data1', 'mean']

city
M    97.309056
P    77.662957
Name: (data1, mean), dtype: float64

In [57]:
!wget https://raw.githubusercontent.com/wesm/pydata-book/1st-edition/ch08/tips.csv

--2019-12-14 12:20:55--  https://raw.githubusercontent.com/wesm/pydata-book/1st-edition/ch08/tips.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.132.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.132.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7943 (7,8K) [text/plain]
Saving to: ‘tips.csv’


2019-12-14 12:20:55 (12,8 MB/s) - ‘tips.csv’ saved [7943/7943]



In [58]:
tips = pd.read_csv('tips.csv')
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


### Column-wise and multiple function application

In [59]:
tips.groupby('sex').mean()

Unnamed: 0_level_0,total_bill,tip,size
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,18.056897,2.833448,2.45977
Male,20.744076,3.089618,2.630573


In [60]:
tips.groupby('sex').std()

Unnamed: 0_level_0,total_bill,tip,size
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,8.009209,1.159495,0.937644
Male,9.246469,1.489102,0.955997


In [61]:
tips.groupby('sex').agg(['mean', 'std'])

Unnamed: 0_level_0,total_bill,total_bill,tip,tip,size,size
Unnamed: 0_level_1,mean,std,mean,std,mean,std
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Female,18.056897,8.009209,2.833448,1.159495,2.45977,0.937644
Male,20.744076,9.246469,3.089618,1.489102,2.630573,0.955997


In [62]:
tips.groupby('sex').agg(['mean', 'std', 'count'])

Unnamed: 0_level_0,total_bill,total_bill,total_bill,tip,tip,tip,size,size,size
Unnamed: 0_level_1,mean,std,count,mean,std,count,mean,std,count
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Female,18.056897,8.009209,87,2.833448,1.159495,87,2.45977,0.937644,87
Male,20.744076,9.246469,157,3.089618,1.489102,157,2.630573,0.955997,157


In [67]:
tips.groupby(['sex', 'smoker']).agg([np.mean, np.std, np.count_nonzero])

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,total_bill,total_bill,tip,tip,tip,size,size,size
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,count_nonzero,mean,std,count_nonzero,mean,std,count_nonzero
sex,smoker,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Female,No,18.105185,7.286455,54.0,2.773519,1.128425,54.0,2.592593,1.073146,54
Female,Yes,17.977879,9.189751,33.0,2.931515,1.219916,33.0,2.242424,0.613917,33
Male,No,19.791237,8.726566,97.0,3.113402,1.489559,97.0,2.71134,0.989094,97
Male,Yes,22.2845,9.911845,60.0,3.051167,1.50012,60.0,2.5,0.89253,60


In [68]:
def minmax(series):
    return series.max() - series.min()

In [69]:
tips.groupby(['sex', 'smoker']).agg(minmax)

Unnamed: 0_level_0,Unnamed: 1_level_0,size,tip,total_bill
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,No,5,4.2,28.58
Female,Yes,3,5.5,41.23
Male,No,4,7.75,40.82
Male,Yes,4,9.0,43.56


In [70]:
tips.groupby(['sex', 'smoker']).agg({'total_bill':'mean', 'size':np.count_nonzero})

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,size
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,No,18.105185,54
Female,Yes,17.977879,33
Male,No,19.791237,97
Male,Yes,22.2845,60


### Group-wise operations and transformations

In [75]:
smoker_gb = tips.groupby('smoker')['total_bill'].agg([np.mean, 'std'])
smoker_gb

Unnamed: 0_level_0,mean,std
smoker,Unnamed: 1_level_1,Unnamed: 2_level_1
No,19.188278,8.255582
Yes,20.756344,9.832154


In [80]:
tips.merge(smoker_gb, right_index=True, left_on='smoker')

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,mean,std
0,16.99,1.01,Female,No,Sun,Dinner,2,19.188278,8.255582
1,10.34,1.66,Male,No,Sun,Dinner,3,19.188278,8.255582
2,21.01,3.50,Male,No,Sun,Dinner,3,19.188278,8.255582
3,23.68,3.31,Male,No,Sun,Dinner,2,19.188278,8.255582
4,24.59,3.61,Female,No,Sun,Dinner,4,19.188278,8.255582
...,...,...,...,...,...,...,...,...,...
234,15.53,3.00,Male,Yes,Sat,Dinner,2,20.756344,9.832154
236,12.60,1.00,Male,Yes,Sat,Dinner,2,20.756344,9.832154
237,32.83,1.17,Male,Yes,Sat,Dinner,2,20.756344,9.832154
240,27.18,2.00,Female,Yes,Sat,Dinner,2,20.756344,9.832154


### Apply: General split-apply-combine

In [83]:
tips['tip_pct'] = tips['tip']/tips['total_bill']
tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct
0,16.99,1.01,Female,No,Sun,Dinner,2,0.059447
1,10.34,1.66,Male,No,Sun,Dinner,3,0.160542
2,21.01,3.50,Male,No,Sun,Dinner,3,0.166587
3,23.68,3.31,Male,No,Sun,Dinner,2,0.139780
4,24.59,3.61,Female,No,Sun,Dinner,4,0.146808
...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,0.203927
240,27.18,2.00,Female,Yes,Sat,Dinner,2,0.073584
241,22.67,2.00,Male,Yes,Sat,Dinner,2,0.088222
242,17.82,1.75,Male,No,Sat,Dinner,2,0.098204


In [84]:
def top(df, n=5, col='tip_pct'):
    return df.sort_values(by=col, ascending=False).head(n)

In [86]:
tips_by_smoker = tips.groupby('smoker')['tip_pct'].agg([np.mean, np.std])
tips_by_smoker

Unnamed: 0_level_0,mean,std
smoker,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.159328,0.03991
Yes,0.163196,0.085119


In [89]:
merged = tips.merge(tips_by_smoker, left_on='smoker', right_index=True)
merged

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct,mean,std
0,16.99,1.01,Female,No,Sun,Dinner,2,0.059447,0.159328,0.039910
1,10.34,1.66,Male,No,Sun,Dinner,3,0.160542,0.159328,0.039910
2,21.01,3.50,Male,No,Sun,Dinner,3,0.166587,0.159328,0.039910
3,23.68,3.31,Male,No,Sun,Dinner,2,0.139780,0.159328,0.039910
4,24.59,3.61,Female,No,Sun,Dinner,4,0.146808,0.159328,0.039910
...,...,...,...,...,...,...,...,...,...,...
234,15.53,3.00,Male,Yes,Sat,Dinner,2,0.193175,0.163196,0.085119
236,12.60,1.00,Male,Yes,Sat,Dinner,2,0.079365,0.163196,0.085119
237,32.83,1.17,Male,Yes,Sat,Dinner,2,0.035638,0.163196,0.085119
240,27.18,2.00,Female,Yes,Sat,Dinner,2,0.073584,0.163196,0.085119


In [90]:
top(merged)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct,mean,std
172,7.25,5.15,Male,Yes,Sun,Dinner,2,0.710345,0.163196,0.085119
178,9.6,4.0,Female,Yes,Sun,Dinner,2,0.416667,0.163196,0.085119
67,3.07,1.0,Female,Yes,Sat,Dinner,1,0.325733,0.163196,0.085119
232,11.61,3.39,Male,No,Sat,Dinner,2,0.29199,0.159328,0.03991
183,23.17,6.5,Male,Yes,Sun,Dinner,4,0.280535,0.163196,0.085119


In [91]:
merged.groupby('sex').apply(top)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip,sex,smoker,day,time,size,tip_pct,mean,std
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Female,178,9.6,4.0,Female,Yes,Sun,Dinner,2,0.416667,0.163196,0.085119
Female,67,3.07,1.0,Female,Yes,Sat,Dinner,1,0.325733,0.163196,0.085119
Female,109,14.31,4.0,Female,Yes,Sat,Dinner,2,0.279525,0.163196,0.085119
Female,93,16.32,4.3,Female,Yes,Fri,Dinner,2,0.26348,0.163196,0.085119
Female,221,13.42,3.48,Female,Yes,Fri,Lunch,2,0.259314,0.163196,0.085119
Male,172,7.25,5.15,Male,Yes,Sun,Dinner,2,0.710345,0.163196,0.085119
Male,232,11.61,3.39,Male,No,Sat,Dinner,2,0.29199,0.159328,0.03991
Male,183,23.17,6.5,Male,Yes,Sun,Dinner,4,0.280535,0.163196,0.085119
Male,149,7.51,2.0,Male,No,Thur,Lunch,2,0.266312,0.159328,0.03991
Male,181,23.33,5.65,Male,Yes,Sun,Dinner,2,0.242177,0.163196,0.085119


#### Suppressing the group keys

In [95]:
tips.groupby(['sex', 'smoker'], group_keys=False).apply(top)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct
51,10.29,2.6,Female,No,Sun,Dinner,2,0.252672
139,13.16,2.75,Female,No,Thur,Lunch,2,0.208967
18,16.97,3.5,Female,No,Sun,Dinner,3,0.206246
14,14.83,3.02,Female,No,Sun,Dinner,2,0.203641
115,17.31,3.5,Female,No,Sun,Dinner,2,0.202195
178,9.6,4.0,Female,Yes,Sun,Dinner,2,0.416667
67,3.07,1.0,Female,Yes,Sat,Dinner,1,0.325733
109,14.31,4.0,Female,Yes,Sat,Dinner,2,0.279525
93,16.32,4.3,Female,Yes,Fri,Dinner,2,0.26348
221,13.42,3.48,Female,Yes,Fri,Lunch,2,0.259314


### Quantile and bucket analysis

In [97]:
pd.cut(merged['total_bill'], 5)

0      (12.618, 22.166]
1       (3.022, 12.618]
2      (12.618, 22.166]
3      (22.166, 31.714]
4      (22.166, 31.714]
             ...       
234    (12.618, 22.166]
236     (3.022, 12.618]
237    (31.714, 41.262]
240    (22.166, 31.714]
241    (22.166, 31.714]
Name: total_bill, Length: 244, dtype: category
Categories (5, interval[float64]): [(3.022, 12.618] < (12.618, 22.166] < (22.166, 31.714] < (31.714, 41.262] < (41.262, 50.81]]

In [103]:
tips.groupby(pd.cut(merged['total_bill'], 5)).count()['size']

total_bill
(3.022, 12.618]      49
(12.618, 22.166]    119
(22.166, 31.714]     50
(31.714, 41.262]     19
(41.262, 50.81]       7
Name: size, dtype: int64

In [105]:
tips.groupby('total_bill').count()['size']

total_bill
3.07     1
5.75     1
7.25     2
7.51     1
7.56     1
        ..
45.35    1
48.17    1
48.27    1
48.33    1
50.81    1
Name: size, Length: 229, dtype: int64

### Example: Filling missing values with group-specific values

In [53]:
s = Series(np.random.randn(6))
s[::2] = np.nan
s

0         NaN
1    0.276356
2         NaN
3   -0.679335
4         NaN
5   -1.930931
dtype: float64

In [54]:
s.fillna(s.mean())

0   -0.777970
1    0.276356
2   -0.777970
3   -0.679335
4   -0.777970
5   -1.930931
dtype: float64

In [55]:
states = ['Ohio', 'New York', 'Vermont', 'Florida',
          'Oregon', 'Nevada', 'California', 'Idaho']
group_key = ['East'] * 4 + ['West'] * 4
data = Series(np.random.randn(8), index=states)
data[['Vermont', 'Nevada', 'Idaho']] = np.nan
data

Ohio         -0.607638
New York     -0.195258
Vermont            NaN
Florida       0.422648
Oregon        2.001614
Nevada             NaN
California   -1.104685
Idaho              NaN
dtype: float64

In [56]:
data.groupby(group_key).mean()

East   -0.126749
West    0.448465
dtype: float64

In [57]:
fill_mean = lambda g: g.fillna(g.mean())
data.groupby(group_key).apply(fill_mean)

Ohio         -0.607638
New York     -0.195258
Vermont      -0.126749
Florida       0.422648
Oregon        2.001614
Nevada        0.448465
California   -1.104685
Idaho         0.448465
dtype: float64

In [58]:
fill_values = {'East': 0.5, 'West': -1}
fill_func = lambda g: g.fillna(fill_values[g.name])

data.groupby(group_key).apply(fill_func)

Ohio         -0.607638
New York     -0.195258
Vermont       0.500000
Florida       0.422648
Oregon        2.001614
Nevada       -1.000000
California   -1.104685
Idaho        -1.000000
dtype: float64

## Pivot tables and Cross-tabulation

In [106]:
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct
0,16.99,1.01,Female,No,Sun,Dinner,2,0.059447
1,10.34,1.66,Male,No,Sun,Dinner,3,0.160542
2,21.01,3.5,Male,No,Sun,Dinner,3,0.166587
3,23.68,3.31,Male,No,Sun,Dinner,2,0.13978
4,24.59,3.61,Female,No,Sun,Dinner,4,0.146808


In [107]:
tips.pivot_table(index='size', columns='smoker', values='tip_pct')

smoker,No,Yes
size,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.159829,0.274755
2,0.164996,0.166706
3,0.149671,0.157543
4,0.147604,0.142036
5,0.178415,0.086116
6,0.156229,


In [108]:
tips.groupby(['size', 'smoker']).mean()['tip_pct']

size  smoker
1     No        0.159829
      Yes       0.274755
2     No        0.164996
      Yes       0.166706
3     No        0.149671
      Yes       0.157543
4     No        0.147604
      Yes       0.142036
5     No        0.178415
      Yes       0.086116
6     No        0.156229
Name: tip_pct, dtype: float64

In [109]:
tips.pivot_table(index='size', columns='smoker', values='tip_pct', aggfunc='sum')

smoker,No,Yes
size,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.319659,0.549509
2,14.849611,11.002583
3,3.891449,1.890512
4,3.837717,1.562397
5,0.535246,0.172232
6,0.624917,


In [110]:
tips.pivot_table(index=['size', 'sex'], columns=['smoker', 'time'], values=['total_bill', 'tip_pct'],
                aggfunc=['mean', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,mean,mean,mean,mean,mean,mean,mean,sum,sum,sum,sum,sum,sum,sum,sum
Unnamed: 0_level_1,Unnamed: 1_level_1,tip_pct,tip_pct,tip_pct,tip_pct,total_bill,total_bill,total_bill,total_bill,tip_pct,tip_pct,tip_pct,tip_pct,total_bill,total_bill,total_bill,total_bill
Unnamed: 0_level_2,smoker,No,No,Yes,Yes,No,No,Yes,Yes,No,No,Yes,Yes,No,No,Yes,Yes
Unnamed: 0_level_3,time,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch
size,sex,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4
1,Female,0.137931,0.181728,0.325733,,7.25,10.07,3.07,,0.137931,0.181728,0.325733,,7.25,10.07,3.07,
1,Male,,,,0.223776,,,,8.58,,,,0.223776,,,,8.58
2,Female,0.159709,0.160521,0.187208,0.180075,17.574667,13.172778,16.131176,14.34125,2.395629,2.889373,3.182535,1.440599,263.62,237.11,274.23,114.73
2,Male,0.166093,0.172176,0.156298,0.15342,15.50878,15.26375,19.937742,17.805,6.809795,2.754814,4.845252,1.534197,635.86,244.22,618.07,178.05
3,Female,0.162042,0.130348,0.161869,0.196114,19.864286,17.31,27.88,16.47,1.134293,0.260696,0.647477,0.196114,139.05,34.62,111.52,16.47
3,Male,0.150058,0.09553,0.138855,0.213789,21.86,22.82,31.933333,18.71,2.400929,0.09553,0.833131,0.213789,349.76,22.82,191.6,18.71
4,Female,0.141282,0.134849,0.102522,0.115982,28.07,29.455,30.14,43.11,0.706408,0.269698,0.102522,0.115982,140.35,58.91,30.14,43.11
4,Male,0.150808,0.147059,0.143632,0.194837,27.53,27.2,30.365,20.53,2.714552,0.147059,1.149056,0.194837,495.54,27.2,242.92,20.53
5,Female,0.172194,,,,29.85,,,,0.172194,,,,29.85,,,
5,Male,0.241663,0.121389,0.086116,,20.69,41.19,29.305,,0.241663,0.121389,0.172232,,20.69,41.19,58.61,
