In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame(
    {
        'key1': ['a', 'a', 'b', 'b', 'a'],
        'key2': ['one', 'two', 'one', 'two', 'one'],
        'data1': np.random.randn(5),
        'data2': np.random.randn(5)
    }
)
df

Unnamed: 0,key1,key2,data1,data2
0,a,one,0.041765,1.263783
1,a,two,-1.093742,0.071787
2,b,one,-1.453398,0.058801
3,b,two,0.613571,-0.055852
4,a,one,0.052273,1.261518


In [4]:
gropued_i = df.groupby('key1')

In [5]:
for grp_name, grp_data in gropued_i:
    print(grp_name)
    print(grp_data)
    print('=='*20)

a
  key1 key2     data1     data2
0    a  one  0.041765  1.263783
1    a  two -1.093742  0.071787
4    a  one  0.052273  1.261518
b
  key1 key2     data1     data2
2    b  one -1.453398  0.058801
3    b  two  0.613571 -0.055852


In [6]:
# When there are multiple keys, first element in the tuple will be a
# tuple of key values
gropued_ii = df.groupby(by = ['key1', 'key2'])
for (grp_name_level_i, grp_name_level_ii), grp_data in gropued_ii:
    print(grp_name_level_i)
    print(grp_name_level_ii)
    print(grp_data)
    print('=='*20)

a
one
  key1 key2     data1     data2
0    a  one  0.041765  1.263783
4    a  one  0.052273  1.261518
a
two
  key1 key2     data1     data2
1    a  two -1.093742  0.071787
b
one
  key1 key2     data1     data2
2    b  one -1.453398  0.058801
b
two
  key1 key2     data1     data2
3    b  two  0.613571 -0.055852


In [7]:
# A recipe for computing dict of data pieces
grp_ii = df.groupby('key1')
grp_ii

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x00000164E043C390>

In [8]:
grp_ii_list = list(grp_ii)
grp_ii_list

[('a',
    key1 key2     data1     data2
  0    a  one  0.041765  1.263783
  1    a  two -1.093742  0.071787
  4    a  one  0.052273  1.261518),
 ('b',
    key1 key2     data1     data2
  2    b  one -1.453398  0.058801
  3    b  two  0.613571 -0.055852)]

In [9]:
grp_ii_list_dict = dict(grp_ii_list)
grp_ii_list_dict

{'a':   key1 key2     data1     data2
 0    a  one  0.041765  1.263783
 1    a  two -1.093742  0.071787
 4    a  one  0.052273  1.261518,
 'b':   key1 key2     data1     data2
 2    b  one -1.453398  0.058801
 3    b  two  0.613571 -0.055852}

In [10]:
type(grp_ii_list_dict['a'])

pandas.core.frame.DataFrame

In [11]:
grp_ii_list_dict['a']

Unnamed: 0,key1,key2,data1,data2
0,a,one,0.041765,1.263783
1,a,two,-1.093742,0.071787
4,a,one,0.052273,1.261518


> By default GroupBy groups on axis=0

In [12]:
# We could group the DataFrame columns by dtypes
df.dtypes

key1      object
key2      object
data1    float64
data2    float64
dtype: object

In [13]:
grp_iii = df.groupby(by = df.dtypes, axis = 1)

In [14]:
for grp_name, grp_data in grp_iii:
    print(grp_name)
    print(grp_data)
    print('=='*20)

float64
      data1     data2
0  0.041765  1.263783
1 -1.093742  0.071787
2 -1.453398  0.058801
3  0.613571 -0.055852
4  0.052273  1.261518
object
  key1 key2
0    a  one
1    a  two
2    b  one
3    b  two
4    a  one
