# Pandas Reshaping

In [1]:
import pandas as pd, numpy as np

### Stack
- The stack() method "compresses" a level in the DataFrame's columns

In [5]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                         'foo', 'foo', 'qux', 'qux'],
                        ['one', 'two', 'one', 'two',
                         'one', 'two', 'one', 'two']]))
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [7]:
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [13]:
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df2 = df[:4]
df


Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.117071,-0.526528
bar,two,-0.128731,-0.342161
baz,one,-1.46591,2.694722
baz,two,0.873542,0.175771
foo,one,0.089678,-0.072269
foo,two,0.216068,-0.930282
qux,one,-1.019816,1.135911
qux,two,0.036613,0.980198


In [14]:
df2

# we are going to use the stack() function to "compress" the columns into the index.

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.117071,-0.526528
bar,two,-0.128731,-0.342161
baz,one,-1.46591,2.694722
baz,two,0.873542,0.175771


In [15]:
stacked = df2.stack()

In [17]:
stacked

first  second   
bar    one     A    0.117071
               B   -0.526528
       two     A   -0.128731
               B   -0.342161
baz    one     A   -1.465910
               B    2.694722
       two     A    0.873542
               B    0.175771
dtype: float64

### Unstack
- the inverse operation of stack() is unstack(), which by default unstacks the last level:

In [19]:
stacked.unstack() # This is simmilar to df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.117071,-0.526528
bar,two,-0.128731,-0.342161
baz,one,-1.46591,2.694722
baz,two,0.873542,0.175771


In [20]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,0.117071,-0.128731
bar,B,-0.526528,-0.342161
baz,A,-1.46591,0.873542
baz,B,2.694722,0.175771


In [22]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.117071,-1.46591
one,B,-0.526528,2.694722
two,A,-0.128731,0.873542
two,B,-0.342161,0.175771


### Pivot Tables

In [24]:
df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3,
                       'B': ['A', 'B', 'C'] * 4,
                       'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
                       'D': np.random.randn(12),
                       'E': np.random.randn(12)})

In [25]:
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,-0.501594,0.146318
1,one,B,foo,0.712424,-0.384372
2,two,C,foo,-0.440551,-0.162417
3,three,A,bar,-1.112508,0.708032
4,one,B,bar,0.091361,-0.436871
5,one,C,bar,-0.129742,0.409215
6,two,A,foo,0.860685,1.293326
7,three,B,foo,1.770378,-0.169101
8,one,C,foo,1.003843,1.783656
9,one,A,bar,-0.013305,-1.140718


In [26]:
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.013305,-0.501594
one,B,0.091361,0.712424
one,C,-0.129742,1.003843
three,A,-1.112508,
three,B,,1.770378
three,C,-0.732157,
two,A,,0.860685
two,B,0.203167,
two,C,,-0.440551
