In [1]:
import pandas as pd
import numpy as np

## Groupby

In [21]:
data = {'name' : ['john','tim','kent','Roy','Rose'],
        'Age' : [34,40,28,40,30],
        'Gender' : ['M','M','M','M','F']
}

In [22]:
df = pd.DataFrame(data)

In [23]:
df

Unnamed: 0,name,Age,Gender
0,john,34,M
1,tim,40,M
2,kent,28,M
3,Roy,40,M
4,Rose,30,F


In [28]:
group = df.groupby('Gender')

In [29]:
for s,g in group:
    print(s)
    print(g)

F
   name  Age Gender
4  Rose   30      F
M
   name  Age Gender
0  john   34      M
1   tim   40      M
2  kent   28      M
3   Roy   40      M


In [31]:
group.get_group('M')

Unnamed: 0,name,Age,Gender
0,john,34,M
1,tim,40,M
2,kent,28,M
3,Roy,40,M


## Aggregation

In [34]:
g = df.groupby('Age')
g['Age'].aggregate('mean')

Age
28    28
30    30
34    34
40    40
Name: Age, dtype: int64

In [35]:
g['Age'].agg(np.mean)

Age
28    28
30    30
34    34
40    40
Name: Age, dtype: int64

In [36]:
df.groupby('Gender')['Age'].agg(np.mean)

Gender
F    30.0
M    35.5
Name: Age, dtype: float64

In [37]:
df.groupby('Gender')['Age'].aggregate('mean')

Gender
F    30.0
M    35.5
Name: Age, dtype: float64

In [25]:
df.groupby('Gender').groups

{'F': Int64Index([4], dtype='int64'),
 'M': Int64Index([0, 1, 2, 3], dtype='int64')}

In [26]:
df.groupby(['Gender','Age']).groups

{('F', 30): Int64Index([4], dtype='int64'),
 ('M', 28): Int64Index([2], dtype='int64'),
 ('M', 34): Int64Index([0], dtype='int64'),
 ('M', 40): Int64Index([1, 3], dtype='int64')}

## Transformation

In [39]:
group = df.groupby('Gender')
group.transform(lambda x: x.mean())

Unnamed: 0,Age
0,35.5
1,35.5
2,35.5
3,35.5
4,30.0


## Filtration

In [40]:
df.groupby('Age').groups

{28: Int64Index([2], dtype='int64'),
 30: Int64Index([4], dtype='int64'),
 34: Int64Index([0], dtype='int64'),
 40: Int64Index([1, 3], dtype='int64')}

In [49]:
df.groupby('Age').filter(lambda x : len(x)>1)

Unnamed: 0,name,Age,Gender
1,tim,40,M
3,Roy,40,M


In [50]:
df[df['Age']>30]

Unnamed: 0,name,Age,Gender
0,john,34,M
1,tim,40,M
3,Roy,40,M


## Sorting

In [51]:
df.sort_values('Age')

Unnamed: 0,name,Age,Gender
2,kent,28,M
4,Rose,30,F
0,john,34,M
1,tim,40,M
3,Roy,40,M


In [52]:
df.sort_values('Age',ascending= False)

Unnamed: 0,name,Age,Gender
1,tim,40,M
3,Roy,40,M
0,john,34,M
4,Rose,30,F
2,kent,28,M


In [53]:
df.sort_values(['Age','Name'],ascending=[True,False])

KeyError: 'Name'