#### 10.2.1 Data Aggregation

In [37]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [38]:
import pandas as pd
import numpy as np

In [39]:
df = pd.DataFrame(
    {
        'key1': ['a', 'a', 'b', 'b', 'a'],
        'key2': ['one', 'two', 'one', 'two', 'one'],
        'data1': np.random.randn(5),
        'data2': np.random.randn(5)
    }
)
df

Unnamed: 0,key1,key2,data1,data2
0,a,one,0.812154,1.726898
1,a,two,-1.691696,0.020937
2,b,one,0.07522,-1.598293
3,b,two,-0.338561,0.585553
4,a,one,0.399397,-0.451658


In [40]:
grp_i = df.groupby('key1')

In [41]:
for grp_name, grp_data in grp_i:
    print(grp_name)
    print(grp_data)
    print('=='*20)

a
  key1 key2     data1     data2
0    a  one  0.812154  1.726898
1    a  two -1.691696  0.020937
4    a  one  0.399397 -0.451658
b
  key1 key2     data1     data2
2    b  one  0.075220 -1.598293
3    b  two -0.338561  0.585553


In [42]:
grp_i['data1'].quantile(0.9)

key1
a    0.729602
b    0.033842
Name: data1, dtype: float64

In [43]:
# To use our own aggregation function, we may pass any function that aggregates
# an array to the aggregate
def peak_to_peak(arr):
    return arr.max() - arr.min()

In [44]:
grp_i.agg(peak_to_peak)

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,2.50385,2.178557
b,0.413781,2.183846


In [45]:
# You may notice that some method like describe also work,
# Even though they are not aggregations
grp_i.describe()

Unnamed: 0_level_0,data1,data1,data1,data1,data1,data1,data1,data1,data2,data2,data2,data2,data2,data2,data2,data2
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
a,3.0,-0.160048,1.342405,-1.691696,-0.646149,0.399397,0.605776,0.812154,3.0,0.432059,1.14599,-0.451658,-0.21536,0.020937,0.873918,1.726898
b,2.0,-0.131671,0.292588,-0.338561,-0.235116,-0.131671,-0.028225,0.07522,2.0,-0.50637,1.544212,-1.598293,-1.052331,-0.50637,0.039592,0.585553
