In [302]:
# This slide emphasizes two things:
# 1. when doing groupby() + apply(); the apply(foo) calls foo() twice on the first group (by design, see below)
# 2. I got annoyed when after doing groupby() + apply() by the fact the keys were dupes in the final dataframe :-(
#    I kept experimenting and it turns out this can be controled by a groupby(..., group_keys=False) parameter

In [303]:
import pandas as pd

In [304]:
df = pd.DataFrame({'maturity': [20181211, 20181211, 20181211, 20190115, 20190115], 
                   'call': [True, True, False, False, True], 
                   'strike': [105, 110, 110, 110, 110]})

In [305]:
df

Unnamed: 0,call,maturity,strike
0,True,20181211,105
1,True,20181211,110
2,False,20181211,110
3,False,20190115,110
4,True,20190115,110


In [306]:
#gb = df.groupby(['maturity', 'call'], as_index=False, group_keys=False)
gb = df.groupby(['maturity', 'call'], group_keys=False)
#gb = df.groupby(['maturity', 'call'], as_index=False)

In [307]:
gb.count()

Unnamed: 0_level_0,Unnamed: 1_level_0,strike
maturity,call,Unnamed: 2_level_1
20181211,False,1
20181211,True,2
20190115,False,1
20190115,True,1


In [308]:
# you would expect to see the message print 4x times (as there are 4x groups)
# but it seems the function is applied twice for the first group! (see doc)

# http://pandas.pydata.org/pandas-docs/stable/groupby.html#flexible-apply

In [309]:
gb.apply(lambda x: print("- len =", len(x)))

- len = 1
- len = 1
- len = 2
- len = 1
- len = 1


In [310]:
gb.apply(lambda x: x)

Unnamed: 0,call,maturity,strike
0,True,20181211,105
1,True,20181211,110
2,False,20181211,110
3,False,20190115,110
4,True,20190115,110


In [311]:
# if you use groupby(group_keys=True) then when you return a copy of the data you get the columns dupes
# (and extra index level) or if you modify the data (even inplace)

In [312]:
gb.apply(lambda x: x.copy())

Unnamed: 0,call,maturity,strike
2,False,20181211,110
0,True,20181211,105
1,True,20181211,110
3,False,20190115,110
4,True,20190115,110


In [313]:
gb.apply(lambda x: x[x.strike == 110])

Unnamed: 0,call,maturity,strike
2,False,20181211,110
1,True,20181211,110
3,False,20190115,110
4,True,20190115,110


In [314]:
def inplaceDrop(x):
    x.drop(x[x.strike != 110].index, inplace=True)
    return x

In [315]:
gb.apply(inplaceDrop)

Unnamed: 0,call,maturity,strike
2,False,20181211,110
1,True,20181211,110
3,False,20190115,110
4,True,20190115,110
