In [43]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [44]:
import pandas as pd
import numpy as np

In [45]:
df = pd.DataFrame(
    {
        'key1': ['a', 'a', 'b', 'b', 'a'],
        'key2': ['one', 'two', 'one', 'two', 'one'],
        'data1': np.random.randn(5),
        'data2': np.random.randn(5)
    }
)
df

Unnamed: 0,key1,key2,data1,data2
0,a,one,1.25231,-0.309902
1,a,two,-1.174941,-1.373583
2,b,one,-0.406398,0.999263
3,b,two,-1.077077,-2.423391
4,a,one,1.068924,0.24588


In [46]:
gropued_i = df.groupby('key1')

In [47]:
for grp_name, grp_data in gropued_i:
    print(grp_name)
    print(grp_data)

a
  key1 key2     data1     data2
0    a  one  1.252310 -0.309902
1    a  two -1.174941 -1.373583
4    a  one  1.068924  0.245880
b
  key1 key2     data1     data2
2    b  one -0.406398  0.999263
3    b  two -1.077077 -2.423391


In [48]:
# When there are multiple keys, first element in the tuple will be a
# tuple of key values
gropued_ii = df.groupby(by = ['key1', 'key2'])
for (grp_name_level_i, grp_name_level_ii), grp_data in gropued_ii:
    print(grp_name_level_i)
    print(grp_name_level_ii)
    print(grp_data)
    print('=='*20)

a
one
  key1 key2     data1     data2
0    a  one  1.252310 -0.309902
4    a  one  1.068924  0.245880
a
two
  key1 key2     data1     data2
1    a  two -1.174941 -1.373583
b
one
  key1 key2     data1     data2
2    b  one -0.406398  0.999263
b
two
  key1 key2     data1     data2
3    b  two -1.077077 -2.423391


In [49]:
# A recipe for computing dict of data pieces
grp_ii = df.groupby('key1')
grp_ii

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x00000231AC3BAEB8>

In [50]:
grp_ii_list = list(grp_ii)
grp_ii_list

[('a',
    key1 key2     data1     data2
  0    a  one  1.252310 -0.309902
  1    a  two -1.174941 -1.373583
  4    a  one  1.068924  0.245880),
 ('b',
    key1 key2     data1     data2
  2    b  one -0.406398  0.999263
  3    b  two -1.077077 -2.423391)]

In [51]:
grp_ii_list_dict = dict(grp_ii_list)
grp_ii_list_dict

{'a':   key1 key2     data1     data2
 0    a  one  1.252310 -0.309902
 1    a  two -1.174941 -1.373583
 4    a  one  1.068924  0.245880,
 'b':   key1 key2     data1     data2
 2    b  one -0.406398  0.999263
 3    b  two -1.077077 -2.423391}

In [52]:
type(grp_ii_list_dict['a'])

pandas.core.frame.DataFrame

In [53]:
grp_ii_list_dict['a']

Unnamed: 0,key1,key2,data1,data2
0,a,one,1.25231,-0.309902
1,a,two,-1.174941,-1.373583
4,a,one,1.068924,0.24588


> By default GroupBy groups on axis=0

In [54]:
# We could group the DataFrame columns by dtypes
df.dtypes

key1      object
key2      object
data1    float64
data2    float64
dtype: object

In [55]:
grp_iii = df.groupby(by = df.dtypes, axis = 1)

In [56]:
for grp_name, grp_data in grp_iii:
    print(grp_name)
    print(grp_data)
    print('=='*20)

float64
      data1     data2
0  1.252310 -0.309902
1 -1.174941 -1.373583
2 -0.406398  0.999263
3 -1.077077 -2.423391
4  1.068924  0.245880
object
  key1 key2
0    a  one
1    a  two
2    b  one
3    b  two
4    a  one
