# Pivot Tables & Reshaping

Vamos a ver algunas técnicas para manejar DataFrames

In [65]:
import pandas as pd
import numpy as np

In [66]:
import pandas.util.testing as tm; tm.N = 3
def unpivot(frame):
    N, K = frame.shape
    data = {'value' : frame.values.ravel('F'),
            'variable' : np.asarray(frame.columns).repeat(N),
            'date' : np.tile(np.asarray(frame.index), K)}
    return pd.DataFrame(data, columns=['date', 'variable', 'value'])
df = unpivot(tm.makeTimeDataFrame())

In [67]:
df.head(10)

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.585463
1,2000-01-04,A,-0.22539
2,2000-01-05,A,0.187171
3,2000-01-03,B,0.203887
4,2000-01-04,B,-1.532304
5,2000-01-05,B,0.172215
6,2000-01-03,C,-1.123566
7,2000-01-04,C,-0.329053
8,2000-01-05,C,-0.455719
9,2000-01-03,D,-1.261929


In [68]:
df.pivot(index='date',columns='variable',values='value')

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,0.585463,0.203887,-1.123566,-1.261929
2000-01-04,-0.22539,-1.532304,-0.329053,-0.398633
2000-01-05,0.187171,0.172215,-0.455719,-0.563963


Si tenemos más de un valor y omitimos pasar el parámetro 'values', entonces pandas creara una serie de columnas jerárquicas por nosotros. Ejemplo:

Creamos una columna adicional 'value2'

In [69]:
df['value2']= df['value']*2

In [70]:
df.head()

Unnamed: 0,date,variable,value,value2
0,2000-01-03,A,0.585463,1.170926
1,2000-01-04,A,-0.22539,-0.45078
2,2000-01-05,A,0.187171,0.374343
3,2000-01-03,B,0.203887,0.407775
4,2000-01-04,B,-1.532304,-3.064608


In [71]:
pivoted = df.pivot('date','variable')
pivoted

Unnamed: 0_level_0,value,value,value,value,value2,value2,value2,value2
variable,A,B,C,D,A,B,C,D
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2000-01-03,0.585463,0.203887,-1.123566,-1.261929,1.170926,0.407775,-2.247131,-2.523858
2000-01-04,-0.22539,-1.532304,-0.329053,-0.398633,-0.45078,-3.064608,-0.658105,-0.797265
2000-01-05,0.187171,0.172215,-0.455719,-0.563963,0.374343,0.34443,-0.911438,-1.127925


In [72]:
type(pivoted)

pandas.core.frame.DataFrame

A la que podemos accesar escribiendo por ejemplo:

In [73]:
pivoted['value2']

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,1.170926,0.407775,-2.247131,-2.523858
2000-01-04,-0.45078,-3.064608,-0.658105,-0.797265
2000-01-05,0.374343,0.34443,-0.911438,-1.127925


## Reshaping by stacking and unstacking

In [74]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                         'foo', 'foo', 'qux', 'qux'],
                        ['one', 'two', 'one', 'two',
                         'one', 'two', 'one', 'two']]))

In [75]:
?zip

In [76]:
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [77]:
index = pd.MultiIndex.from_tuples(tuples,names=['first','second'])
index

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [78]:
df2 = pd.DataFrame(np.random.randn(8,2),index=index, columns=['A','B'])
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.51468,0.43198
bar,two,0.928087,-1.081042
baz,one,0.402941,-0.530293
baz,two,-1.017023,-1.65774
foo,one,0.230068,1.270548
foo,two,0.063703,-0.27133
qux,one,1.943334,0.627181
qux,two,0.496295,1.381683


La función **stack** comprime un nivel las columnas del DataFrame para producir ya sea:
<li> Una Serie, en el caso de un índice de una sóla columna </li>
<li> Un DataFrame, en el caso de columnas con múltiples índices </li>

In [79]:
stacked = df2.stack()
stacked

first  second   
bar    one     A   -0.514680
               B    0.431980
       two     A    0.928087
               B   -1.081042
baz    one     A    0.402941
               B   -0.530293
       two     A   -1.017023
               B   -1.657740
foo    one     A    0.230068
               B    1.270548
       two     A    0.063703
               B   -0.271330
qux    one     A    1.943334
               B    0.627181
       two     A    0.496295
               B    1.381683
dtype: float64

La operación inversa de stack es **unstack** que por default descomprime el último nivel

In [80]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.51468,0.43198
bar,two,0.928087,-1.081042
baz,one,0.402941,-0.530293
baz,two,-1.017023,-1.65774
foo,one,0.230068,1.270548
foo,two,0.063703,-0.27133
qux,one,1.943334,0.627181
qux,two,0.496295,1.381683


In [81]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz,foo,qux
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,A,-0.51468,0.402941,0.230068,1.943334
one,B,0.43198,-0.530293,1.270548,0.627181
two,A,0.928087,-1.017023,0.063703,0.496295
two,B,-1.081042,-1.65774,-0.27133,1.381683


In [82]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,-0.51468,0.928087
bar,B,0.43198,-1.081042
baz,A,0.402941,-1.017023
baz,B,-0.530293,-1.65774
foo,A,0.230068,0.063703
foo,B,1.270548,-0.27133
qux,A,1.943334,0.496295
qux,B,0.627181,1.381683


Si los niveles tienen nombres como es el caso, entonces podemos referirnos a ellos por nombre

In [83]:
stacked.unstack('second')

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,-0.51468,0.928087
bar,B,0.43198,-1.081042
baz,A,0.402941,-1.017023
baz,B,-0.530293,-1.65774
foo,A,0.230068,0.063703
foo,B,1.270548,-0.27133
qux,A,1.943334,0.496295
qux,B,0.627181,1.381683


Nota que los métodos stack y unstack implícitamente ordenan los levels por índice por lo que aplicar unstack y stack nos regresas un nuevo DataFrame ordenado

Generemos un nuevo DataFrame para trabajar y probar nuestra teoría

In [84]:
index = pd.MultiIndex.from_product([[2,1], ['a', 'b']])

In [85]:
df = pd.DataFrame(np.random.randn(4), index=index, columns=['A'])
df

Unnamed: 0,Unnamed: 1,A
2,a,0.867635
2,b,1.669287
1,a,0.453452
1,b,-0.708299


In [86]:
all(df.unstack().stack() == df.sort_index())

True

Comprobada! 

### Trabajando con mútiples levels (niveles)

Primero generemos el DataFrame

In [87]:
columns = pd.MultiIndex.from_tuples([
            ('A', 'cat', 'long'), ('B', 'cat', 'long'),
            ('A', 'dog', 'short'), ('B', 'dog', 'short')
        ],
        names=['exp', 'animal', 'hair_length']
    )

In [88]:
df = pd.DataFrame(np.random.randn(4, 4), columns=columns)
df

exp,A,B,A,B
animal,cat,cat,dog,dog
hair_length,long,long,short,short
0,-0.586704,-1.113524,0.505546,-0.7855
1,-1.618846,1.102545,0.071818,0.146051
2,-1.830518,-0.436956,0.011143,0.671305
3,-0.187962,1.288162,-0.057921,1.210106


La lista de niveles puede tener sin problema el nombre de 'levels' (los niveles) o el índice de los niveles. Entonces

In [89]:
df.stack(level=['animal', 'hair_length'])

Unnamed: 0_level_0,Unnamed: 1_level_0,exp,A,B
Unnamed: 0_level_1,animal,hair_length,Unnamed: 3_level_1,Unnamed: 4_level_1
0,cat,long,-0.586704,-1.113524
0,dog,short,0.505546,-0.7855
1,cat,long,-1.618846,1.102545
1,dog,short,0.071818,0.146051
2,cat,long,-1.830518,-0.436956
2,dog,short,0.011143,0.671305
3,cat,long,-0.187962,1.288162
3,dog,short,-0.057921,1.210106


alternativamente

In [90]:
df.stack(level=[1, 2])

Unnamed: 0_level_0,Unnamed: 1_level_0,exp,A,B
Unnamed: 0_level_1,animal,hair_length,Unnamed: 3_level_1,Unnamed: 4_level_1
0,cat,long,-0.586704,-1.113524
0,dog,short,0.505546,-0.7855
1,cat,long,-1.618846,1.102545
1,dog,short,0.071818,0.146051
2,cat,long,-1.830518,-0.436956
2,dog,short,0.011143,0.671305
3,cat,long,-0.187962,1.288162
3,dog,short,-0.057921,1.210106


### Valores Faltantes (Missing Values)

Generemos primero el DataFrame

In [91]:
columns = pd.MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'),
                                         ('B', 'cat'), ('A', 'dog')],
                                        names=['exp', 'animal'])
    

In [92]:
index = pd.MultiIndex.from_product([('bar', 'baz', 'foo', 'qux'),
                                        ('one', 'two')],
                                       names=['first', 'second'])
    

In [93]:
df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=columns)
df

Unnamed: 0_level_0,exp,A,B,B,A
Unnamed: 0_level_1,animal,cat,dog,cat,dog
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,one,-0.573444,-1.886642,-0.296534,-0.362406
bar,two,-2.567233,0.427774,0.605328,-0.592754
baz,one,-0.055543,-0.042737,-0.232439,0.633023
baz,two,0.403394,-0.203807,-1.6078,-0.565935
foo,one,-1.992386,0.098007,-1.053219,-1.724144
foo,two,-1.609317,-1.172114,-0.286725,-0.061338
qux,one,-0.268931,-0.051576,0.532569,-1.133503
qux,two,-0.071953,-1.514956,-0.513448,0.931387


In [94]:
df2 = df.iloc[[0, 1, 2, 4, 5, 7]]
df2

Unnamed: 0_level_0,exp,A,B,B,A
Unnamed: 0_level_1,animal,cat,dog,cat,dog
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,one,-0.573444,-1.886642,-0.296534,-0.362406
bar,two,-2.567233,0.427774,0.605328,-0.592754
baz,one,-0.055543,-0.042737,-0.232439,0.633023
foo,one,-1.992386,0.098007,-1.053219,-1.724144
foo,two,-1.609317,-1.172114,-0.286725,-0.061338
qux,two,-0.071953,-1.514956,-0.513448,0.931387


Entonces podemos seleccionar con el método stack para seleccionar a que nivel queremos las columnas a acomodar

In [95]:
df2.stack('exp')

Unnamed: 0_level_0,Unnamed: 1_level_0,animal,cat,dog
first,second,exp,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,A,-0.573444,-0.362406
bar,one,B,-0.296534,-1.886642
bar,two,A,-2.567233,-0.592754
bar,two,B,0.605328,0.427774
baz,one,A,-0.055543,0.633023
baz,one,B,-0.232439,-0.042737
foo,one,A,-1.992386,-1.724144
foo,one,B,-1.053219,0.098007
foo,two,A,-1.609317,-0.061338
foo,two,B,-0.286725,-1.172114


In [96]:
df2.stack('animal')

Unnamed: 0_level_0,Unnamed: 1_level_0,exp,A,B
first,second,animal,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,cat,-0.573444,-0.296534
bar,one,dog,-0.362406,-1.886642
bar,two,cat,-2.567233,0.605328
bar,two,dog,-0.592754,0.427774
baz,one,cat,-0.055543,-0.232439
baz,one,dog,0.633023,-0.042737
foo,one,cat,-1.992386,-1.053219
foo,one,dog,-1.724144,0.098007
foo,two,cat,-1.609317,-0.286725
foo,two,dog,-0.061338,-1.172114


Ahora veamos que hacer unstacking puede resultar en valores faltantes si los subgrupos no tienen el mismo conjunto de etiquetas

In [97]:
df3 = df.iloc[[0, 1, 4, 7], [1, 2]]
df3

Unnamed: 0_level_0,exp,B,B
Unnamed: 0_level_1,animal,dog,cat
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2
bar,one,-1.886642,-0.296534
bar,two,0.427774,0.605328
foo,one,0.098007,-1.053219
qux,two,-1.514956,-0.513448


In [98]:
df3.unstack()

exp,B,B,B,B
animal,dog,dog,cat,cat
second,one,two,one,two
first,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
bar,-1.886642,0.427774,-0.296534,0.605328
foo,0.098007,,-1.053219,
qux,,-1.514956,,-0.513448


Para subsanar este problema, unstack toma el argumento 'fill_value' para especificar el tratamiento que le queremos dar a esos valores faltantes (missing values)

In [99]:
df3.unstack(fill_value=-1e9)

exp,B,B,B,B
animal,dog,dog,cat,cat
second,one,two,one,two
first,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
bar,-1.886642,0.4277739,-0.2965341,0.6053285
foo,0.09800737,-1000000000.0,-1.053219,-1000000000.0
qux,-1000000000.0,-1.514956,-1000000000.0,-0.513448


### Valores faltantes con multi-indexing

In [100]:
df[:3]

Unnamed: 0_level_0,exp,A,B,B,A
Unnamed: 0_level_1,animal,cat,dog,cat,dog
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,one,-0.573444,-1.886642,-0.296534,-0.362406
bar,two,-2.567233,0.427774,0.605328,-0.592754
baz,one,-0.055543,-0.042737,-0.232439,0.633023


In [35]:
df[:3].unstack(0)

exp,A,A,B,B,B,B,A,A
animal,cat,cat,dog,dog,cat,cat,dog,dog
first,bar,baz,bar,baz,bar,baz,bar,baz
second,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
one,-0.263316,1.004609,0.265991,-0.196877,-0.421131,0.541896,-1.284754,-0.509182
two,-1.791886,,1.01191,,-0.659064,,0.991658,


In [36]:
df2.unstack(1)

exp,A,A,B,B,B,B,A,A
animal,cat,cat,dog,dog,cat,cat,dog,dog
second,one,two,one,two,one,two,one,two
first,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
bar,-0.263316,-1.791886,0.265991,1.01191,-0.421131,-0.659064,-1.284754,0.991658
baz,1.004609,,-0.196877,,0.541896,,-0.509182,
foo,0.020466,-0.7432,0.795574,0.154623,-0.106643,-0.968228,-1.420256,1.516166
qux,,1.373119,,-0.621956,,0.670785,,-0.5743


### Masajeando un DataFrame con melt

In [37]:
cheese = pd.DataFrame({'first' : ['John', 'Mary'],
                           'last' : ['Doe', 'Bo'],
                           'height' : [5.5, 6.0],
                           'weight' : [130, 150]})
cheese

Unnamed: 0,first,height,last,weight
0,John,5.5,Doe,130
1,Mary,6.0,Bo,150


In [38]:
cheese.melt(id_vars=['first', 'last'])

Unnamed: 0,first,last,variable,value
0,John,Doe,height,5.5
1,Mary,Bo,height,6.0
2,John,Doe,weight,130.0
3,Mary,Bo,weight,150.0


In [39]:
cheese.melt(id_vars=['first', 'last'], var_name='quantity')

Unnamed: 0,first,last,quantity,value
0,John,Doe,height,5.5
1,Mary,Bo,height,6.0
2,John,Doe,weight,130.0
3,Mary,Bo,weight,150.0


### Combinando un estadística y GroupBy

In [101]:
df

Unnamed: 0_level_0,exp,A,B,B,A
Unnamed: 0_level_1,animal,cat,dog,cat,dog
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,one,-0.573444,-1.886642,-0.296534,-0.362406
bar,two,-2.567233,0.427774,0.605328,-0.592754
baz,one,-0.055543,-0.042737,-0.232439,0.633023
baz,two,0.403394,-0.203807,-1.6078,-0.565935
foo,one,-1.992386,0.098007,-1.053219,-1.724144
foo,two,-1.609317,-1.172114,-0.286725,-0.061338
qux,one,-0.268931,-0.051576,0.532569,-1.133503
qux,two,-0.071953,-1.514956,-0.513448,0.931387


In [102]:
df.stack()

Unnamed: 0_level_0,Unnamed: 1_level_0,exp,A,B
first,second,animal,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,cat,-0.573444,-0.296534
bar,one,dog,-0.362406,-1.886642
bar,two,cat,-2.567233,0.605328
bar,two,dog,-0.592754,0.427774
baz,one,cat,-0.055543,-0.232439
baz,one,dog,0.633023,-0.042737
baz,two,cat,0.403394,-1.6078
baz,two,dog,-0.565935,-0.203807
foo,one,cat,-1.992386,-1.053219
foo,one,dog,-1.724144,0.098007


In [103]:
df.stack().mean(1)

first  second  animal
bar    one     cat      -0.434989
               dog      -1.124524
       two     cat      -0.980952
               dog      -0.082490
baz    one     cat      -0.143991
               dog       0.295143
       two     cat      -0.602203
               dog      -0.384871
foo    one     cat      -1.522802
               dog      -0.813068
       two     cat      -0.948021
               dog      -0.616726
qux    one     cat       0.131819
               dog      -0.592539
       two     cat      -0.292701
               dog      -0.291784
dtype: float64

In [104]:
df.stack().mean(1).unstack()

Unnamed: 0_level_0,animal,cat,dog
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.434989,-1.124524
bar,two,-0.980952,-0.08249
baz,one,-0.143991,0.295143
baz,two,-0.602203,-0.384871
foo,one,-1.522802,-0.813068
foo,two,-0.948021,-0.616726
qux,one,0.131819,-0.592539
qux,two,-0.292701,-0.291784


Alternativamente

In [42]:
df.groupby(level=1, axis=1).mean()

Unnamed: 0_level_0,animal,cat,dog
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.342224,-0.509381
bar,two,-1.225475,1.001784
baz,one,0.773252,-0.35303
baz,two,-0.771028,2.16135
foo,one,-0.043089,-0.312341
foo,two,-0.855714,0.835395
qux,one,0.418577,0.298551
qux,two,1.021952,-0.598128


In [43]:
df.stack().groupby(level=1).mean()

exp,A,B
second,Unnamed: 1_level_1,Unnamed: 2_level_1
one,-0.337322,0.319901
two,0.389773,0.002761


In [44]:
df.mean().unstack(0)

exp,A,B
animal,Unnamed: 1_level_1,Unnamed: 2_level_1
cat,-0.258862,0.002925
dog,0.311313,0.319737


### Por fin.... Tablas dinámicas!!

In [45]:
import datetime
df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 6,
                       'B': ['A', 'B', 'C'] * 8,
                       'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4,
                       'D': np.random.randn(24),
                       'E': np.random.randn(24),
                       'F': [datetime.datetime(2013, i, 1) for i in range(1, 13)] +
                            [datetime.datetime(2013, i, 15) for i in range(1, 13)]})
df

Unnamed: 0,A,B,C,D,E,F
0,one,A,foo,-0.835172,1.799015,2013-01-01
1,one,B,foo,1.833729,2.070393,2013-02-01
2,two,C,foo,0.257224,-1.367759,2013-03-01
3,three,A,bar,0.307289,2.030035,2013-04-01
4,one,B,bar,-0.423991,-0.265204,2013-05-01
5,one,C,bar,0.81063,1.739166,2013-06-01
6,two,A,foo,0.76377,0.974195,2013-07-01
7,three,B,foo,0.296675,-0.574531,2013-08-01
8,one,C,foo,-0.41477,-0.59002,2013-09-01
9,one,A,bar,0.849094,0.350371,2013-10-01


Podemos pedirle que nos haga una tabla dinámica de esta información de la siguiente forma

In [46]:
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.53712,-1.230993
one,B,-1.14289,0.91777
one,C,0.08952,0.66268
three,A,0.941718,
three,B,,-0.979393
three,C,0.948707,
two,A,,0.047028
two,B,-0.314629,
two,C,,-0.429362


In [47]:
pd.pivot_table(df, values='D', index=['B'], columns=['A', 'C'], aggfunc=np.sum)

A,one,one,three,three,two,two
C,bar,foo,bar,foo,bar,foo
B,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A,1.074239,-2.461986,1.883436,,,0.094057
B,-2.28578,1.835541,,-1.958787,-0.629257,
C,0.17904,1.32536,1.897414,,,-0.858725


In [48]:
pd.pivot_table(df, values=['D','E'], index=['B'], columns=['A', 'C'], aggfunc=np.sum)

Unnamed: 0_level_0,D,D,D,D,D,D,E,E,E,E,E,E
A,one,one,three,three,two,two,one,one,three,three,two,two
C,bar,foo,bar,foo,bar,foo,bar,foo,bar,foo,bar,foo
B,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
A,1.074239,-2.461986,1.883436,,,0.094057,-0.35663,0.855697,1.771187,,,-0.064347
B,-2.28578,1.835541,,-1.958787,-0.629257,,-0.426053,1.902528,,-0.64368,-2.309317,
C,0.17904,1.32536,1.897414,,,-0.858725,3.441882,-0.779012,-2.756801,,,-1.933443


El objeto resultante es un DataFrame con los índices jerárquicos en filas y columnas. Si no le pasamos el argumento 'values' entonces los datos podrían agregarse en los niveles jerárquicos superiores como por ejemplo:

In [49]:
pd.pivot_table(df, index=['A', 'B'], columns=['C'])

Unnamed: 0_level_0,Unnamed: 1_level_0,D,D,E,E
Unnamed: 0_level_1,C,bar,foo,bar,foo
A,B,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
one,A,0.53712,-1.230993,-0.178315,0.427849
one,B,-1.14289,0.91777,-0.213027,0.951264
one,C,0.08952,0.66268,1.720941,-0.389506
three,A,0.941718,,0.885594,
three,B,,-0.979393,,-0.32184
three,C,0.948707,,-1.378401,
two,A,,0.047028,,-0.032174
two,B,-0.314629,,-1.154659,
two,C,,-0.429362,,-0.966722


Para mejorar el output de una pivot table se puede usar el método .to_string() de la siguiente forma

In [50]:
table = pd.pivot_table(df, index=['A', 'B'], columns=['C'])

In [51]:
print(table.to_string(na_rep=''))

                D                   E          
C             bar       foo       bar       foo
A     B                                        
one   A  0.537120 -1.230993 -0.178315  0.427849
      B -1.142890  0.917770 -0.213027  0.951264
      C  0.089520  0.662680  1.720941 -0.389506
three A  0.941718            0.885594          
      B           -0.979393           -0.321840
      C  0.948707           -1.378401          
two   A            0.047028           -0.032174
      B -0.314629           -1.154659          
      C           -0.429362           -0.966722


Si quieres la agregación de filas y columnas en los márgenes de la tabla puedes adicionarlos sin problema usando el argumento 'margins=True' de la siguiente forma:

In [52]:
df.pivot_table(index=['A', 'B'], columns='C', margins=True, aggfunc=np.std)

Unnamed: 0_level_0,Unnamed: 1_level_0,D,D,D,E,E,E
Unnamed: 0_level_1,C,bar,foo,All,bar,foo,All
A,B,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
one,A,0.441199,0.559776,1.10064,0.747674,1.939123,1.249886
one,B,1.016677,1.295361,1.522926,0.073791,1.582687,1.135182
one,C,1.019803,1.523745,1.1091,0.025774,0.28357,1.229507
three,A,0.897218,,0.897218,1.618485,,1.618485
three,B,,1.804634,1.804634,,0.357359,0.357359
three,C,0.650295,,0.650295,0.604312,,0.604312
two,A,,1.013625,1.013625,,1.42322,1.42322
two,B,1.965972,,1.965972,1.164925,,1.164925
two,C,,0.97098,0.97098,,0.567152,0.567152
All,,1.126707,1.246217,1.15042,1.313767,1.101303,1.160593
