In [4]:
import pandas as pd
import numpy as np

In [5]:
df = pd.DataFrame({'key1':['a','a','b','b','a'],
                  'key2':['one','two','one','two','one'],
                  'data1':np.random.randn(5),
                  'data2':np.random.randn(5)})
df

Unnamed: 0,key1,key2,data1,data2
0,a,one,0.750217,-0.169909
1,a,two,-0.013116,-0.331808
2,b,one,0.237127,0.646069
3,b,two,-0.564757,0.636529
4,a,one,-0.265576,0.759154


In [6]:
pieces = dict(list(df.groupby('key1')))
pieces['b']

Unnamed: 0,key1,key2,data1,data2
2,b,one,0.237127,0.646069
3,b,two,-0.564757,0.636529


In [7]:
df.dtypes

key1      object
key2      object
data1    float64
data2    float64
dtype: object

In [8]:
gp = df.groupby(df.dtypes,axis = 1)

In [9]:
for dtype,group in gp:
    print(dtype)
    print(group)

float64
      data1     data2
0  0.750217 -0.169909
1 -0.013116 -0.331808
2  0.237127  0.646069
3 -0.564757  0.636529
4 -0.265576  0.759154
object
  key1 key2
0    a  one
1    a  two
2    b  one
3    b  two
4    a  one


In [11]:
df.groupby(['key1','key2'])[['data2']].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,data2
key1,key2,Unnamed: 2_level_1
a,one,0.294623
a,two,-0.331808
b,one,0.646069
b,two,0.636529


In [12]:
df.groupby(['key1','key2'])['data2'].mean()

key1  key2
a     one     0.294623
      two    -0.331808
b     one     0.646069
      two     0.636529
Name: data2, dtype: float64

In [14]:
people = pd.DataFrame(np.random.randn(5,5),
                     columns=['a','b','c','d','e'],
                     index = ['Joe','Steve','Wes','Jim','Travis'])
people

Unnamed: 0,a,b,c,d,e
Joe,-0.772256,-0.145411,1.473187,-1.258133,0.650516
Steve,1.428261,1.755178,-1.085632,-1.45902,-1.934098
Wes,0.182446,-0.240843,0.174708,-0.140903,1.372759
Jim,-0.408451,-0.14397,-2.593382,-0.737166,0.235211
Travis,1.380755,-0.15305,0.122616,-0.901029,-0.079383


In [15]:
people.iloc[2:3,[1,2]] = np.nan

In [16]:
people

Unnamed: 0,a,b,c,d,e
Joe,-0.772256,-0.145411,1.473187,-1.258133,0.650516
Steve,1.428261,1.755178,-1.085632,-1.45902,-1.934098
Wes,0.182446,,,-0.140903,1.372759
Jim,-0.408451,-0.14397,-2.593382,-0.737166,0.235211
Travis,1.380755,-0.15305,0.122616,-0.901029,-0.079383


In [17]:
mapping = {'a':'red','b':'red','c':'blue','d':'blue','e':'red','f':'orange'}
mapping

{'a': 'red', 'b': 'red', 'c': 'blue', 'd': 'blue', 'e': 'red', 'f': 'orange'}

In [18]:
by_column = people.groupby(mapping,axis = 1)
by_column.sum()

Unnamed: 0,blue,red
Joe,0.215054,-0.267151
Steve,-2.544652,1.249341
Wes,-0.140903,1.555205
Jim,-3.330549,-0.31721
Travis,-0.778413,1.148322


In [19]:
map_series = pd.Series(mapping)
map_series

a       red
b       red
c      blue
d      blue
e       red
f    orange
dtype: object

In [20]:
people.groupby(map_series,axis = 1).count()

Unnamed: 0,blue,red
Joe,2,3
Steve,2,3
Wes,1,2
Jim,2,3
Travis,2,3


In [21]:
people.groupby(map_series,axis = 1).sum()

Unnamed: 0,blue,red
Joe,0.215054,-0.267151
Steve,-2.544652,1.249341
Wes,-0.140903,1.555205
Jim,-3.330549,-0.31721
Travis,-0.778413,1.148322


In [22]:
people.groupby(len).sum()


Unnamed: 0,a,b,c,d,e
3,-0.998261,-0.289381,-1.120195,-2.136202,2.258486
5,1.428261,1.755178,-1.085632,-1.45902,-1.934098
6,1.380755,-0.15305,0.122616,-0.901029,-0.079383


In [28]:
people.groupby([len(str(i)) for i in people.index.to_list()]).sum()

Unnamed: 0,a,b,c,d,e
3,-0.998261,-0.289381,-1.120195,-2.136202,2.258486
5,1.428261,1.755178,-1.085632,-1.45902,-1.934098
6,1.380755,-0.15305,0.122616,-0.901029,-0.079383


In [27]:
df.index.t

RangeIndex(start=0, stop=5, step=1)

In [29]:
gped = df.groupby(['key1','key2'])
gped.sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,0.484641,0.589245
a,two,-0.013116,-0.331808
b,one,0.237127,0.646069
b,two,-0.564757,0.636529


In [30]:
df

Unnamed: 0,key1,key2,data1,data2
0,a,one,0.750217,-0.169909
1,a,two,-0.013116,-0.331808
2,b,one,0.237127,0.646069
3,b,two,-0.564757,0.636529
4,a,one,-0.265576,0.759154


In [33]:
df1 = df.copy()
index = pd.MultiIndex.from_arrays([df1.key1,df1.key2])
index


MultiIndex([('a', 'one'),
            ('a', 'two'),
            ('b', 'one'),
            ('b', 'two'),
            ('a', 'one')],
           names=['key1', 'key2'])

In [37]:
df1  = pd.DataFrame(np.random.randn(5,2),index = index,columns = ['data1','data2'])
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,0.634347,0.706819
a,two,-1.009014,-0.376985
b,one,-0.415882,1.818327
b,two,0.366929,0.519143
a,one,0.651078,1.737732


In [40]:
df1.groupby(level = 0).count()

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,3,3
b,2,2


In [41]:
df1.groupby(level = 0).sum()

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0.276411,2.067567
b,-0.048953,2.33747


In [None]:
gp