In [3]:
import pandas as pd
import numpy as np

### 7. grouping

In [4]:
df = pd.DataFrame(
    {
        'A' : ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
        'B' : ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
        'C' : np.random.randn(8),
        'D' : np.random.randn(8)
    })

In [7]:
df

Unnamed: 0,A,B,C,D
0,foo,one,-0.30231,0.054691
1,bar,one,1.010874,-1.953055
2,foo,two,0.127743,1.377715
3,bar,three,0.363686,0.138757
4,foo,two,-0.65234,-0.601223
5,bar,two,-0.288872,0.847224
6,foo,one,-0.94988,-0.730096
7,foo,three,-1.150245,1.47553


In [11]:
df.groupby('B').sum()

Unnamed: 0_level_0,C,D
B,Unnamed: 1_level_1,Unnamed: 2_level_1
one,-0.241316,-2.62846
three,-0.78656,1.614287
two,-0.813469,1.623715


In [12]:
df.groupby(['A','B']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,1.010874,-1.953055
bar,three,0.363686,0.138757
bar,two,-0.288872,0.847224
foo,one,-1.25219,-0.675405
foo,three,-1.150245,1.47553
foo,two,-0.524597,0.776491


In [13]:
df.groupby(['B','A']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
B,A,Unnamed: 2_level_1,Unnamed: 3_level_1
one,bar,1.010874,-1.953055
one,foo,-1.25219,-0.675405
three,bar,0.363686,0.138757
three,foo,-1.150245,1.47553
two,bar,-0.288872,0.847224
two,foo,-0.524597,0.776491


### 8. reshaping

In [14]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                     'foo', 'foo', 'qux', 'qux'],
                    ['one', 'two', 'one', 'two',
                     'one', 'two', 'one', 'two']]))

In [15]:
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [16]:
index = pd.MultiIndex.from_tuples(tuples, names=['1st', '2nd'])

In [17]:
index

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['1st', '2nd'])

In [18]:
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])

In [19]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
1st,2nd,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.160324,1.786535
bar,two,-0.005162,1.155922
baz,one,-0.134352,0.502285
baz,two,-0.079072,0.09156
foo,one,-1.030142,-0.535125
foo,two,1.262896,-0.347525
qux,one,1.440906,1.603468
qux,two,-1.380828,0.974759


In [20]:
df2  =  df[:4]

In [21]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
1st,2nd,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.160324,1.786535
bar,two,-0.005162,1.155922
baz,one,-0.134352,0.502285
baz,two,-0.079072,0.09156


In [22]:
stacked = df2.stack()

In [23]:
stacked # 행을 늘려놨음

1st  2nd   
bar  one  A   -0.160324
          B    1.786535
     two  A   -0.005162
          B    1.155922
baz  one  A   -0.134352
          B    0.502285
     two  A   -0.079072
          B    0.091560
dtype: float64

In [28]:
stacked.unstack(0)

Unnamed: 0_level_0,1st,bar,baz
2nd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.160324,-0.134352
one,B,1.786535,0.502285
two,A,-0.005162,-0.079072
two,B,1.155922,0.09156


### pivot table

In [29]:
df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
                   'B' : ['A', 'B', 'C'] * 4,
                   'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
                   'D' : np.random.randn(12),
                   'E' : np.random.randn(12)})

In [30]:
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,0.357469,-0.409212
1,one,B,foo,0.371796,0.462735
2,two,C,foo,0.660894,0.912108
3,three,A,bar,1.345603,1.354333
4,one,B,bar,-0.068768,-0.025743
5,one,C,bar,-0.923342,0.997041
6,two,A,foo,-1.085876,-1.269232
7,three,B,foo,1.155175,-0.482582
8,one,C,foo,1.113235,0.817518
9,one,A,bar,0.61886,-0.386848


In [31]:
df.groupby('A').sum()

Unnamed: 0_level_0,D,E
A,Unnamed: 1_level_1,Unnamed: 2_level_1
one,1.469249,1.455491
three,2.146744,1.964614
two,-1.04962,-0.924336


In [32]:
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.61886,0.357469
one,B,-0.068768,0.371796
one,C,-0.923342,1.113235
three,A,1.345603,
three,B,,1.155175
three,C,-0.354034,
two,A,,-1.085876
two,B,-0.624639,
two,C,,0.660894
