In [1]:
import pandas as pd
import numpy as np

In [None]:
pandas.DataFrame.pivot_table

DataFrame.pivot_table(
    values=None,
    index=None,
    columns=None,
    aggfunc='mean',
    fill_value=None,
    margins=False,
    dropna=True,
    margins_name='All',
    observed=<no_default>,
    sort=True
)

Create a spreadsheet-style pivot table as a DataFrame. The levels in the pivot table will be stored in MultiIndex objects (hierarchical indexes) on the index and columns of the result DataFrame.

values : list-like or scalar, optional
          Column or columns to aggregate.

index   : column, Grouper, array, or list of the previous
          Keys to group by on the pivot table index. If a list is passed, it can contain any of the other types (except list).
          If an array is passed, it must be the same length as the data and will be used in the same manner as column values.

columns  :column, Grouper, array, or list of the previous
          Keys to group by on the pivot table column. If a list is passed, it can contain any of the other types (except list).
          If an array is passed, it must be the same length as the data and will be used in the same manner as column values.

aggfunc  :function, list of functions, dict, default “mean”
         If a list of functions is passed, the resulting pivot table will have hierarchical columns whose top level are the function names (inferred from the function objects themselves).
         If a dict is passed, the key is column to aggregate and the value is function or list of functions.
         If margin=True, aggfunc will be used to calculate the partial aggregates.

fill_value:  scalar, default None
           Value to replace missing values with (in the resulting pivot table, after aggregation).

margins:   bool, default False
          If margins=True, special All columns and rows will be added with partial group aggregates across the categories on the rows and columns.

dropna : bool, default True
         Do not include columns whose entries are all NaN. If True, rows with a NaN value in any column will be omitted before computing margins.

margins_name: str, default ‘All’
            Name of the row / column that will contain the totals when margins is True.

observed:     bool, default False
            This only applies if any of the groupers are Categoricals. If True: only show observed values for categorical groupers. If False: show all values for categorical groupers.

sort:         bool, default True
            Specifies if the result should be sorted.

In [2]:
df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
                         "bar", "bar", "bar", "bar"],
                   "B": ["one", "one", "one", "two", "two",
                         "one", "one", "two", "two"],
                   "C": ["small", "large", "large", "small",
                         "small", "large", "small", "small",
                         "large"],
                   "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
                   "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]})
df

Unnamed: 0,A,B,C,D,E
0,foo,one,small,1,2
1,foo,one,large,2,4
2,foo,one,large,2,5
3,foo,two,small,3,5
4,foo,two,small,3,6
5,bar,one,large,4,6
6,bar,one,small,5,8
7,bar,two,small,6,9
8,bar,two,large,7,9


In [4]:
table = pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'], aggfunc="sum")
table

Unnamed: 0_level_0,C,large,small
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,4.0,5.0
bar,two,7.0,6.0
foo,one,4.0,1.0
foo,two,,6.0


In [5]:
table = pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'], aggfunc="sum", fill_value=0)
table

Unnamed: 0_level_0,C,large,small
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,4,5
bar,two,7,6
foo,one,4,1
foo,two,0,6


In [6]:
table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], aggfunc={'D': "mean", 'E': "mean"})
table

Unnamed: 0_level_0,Unnamed: 1_level_0,D,E
A,C,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,large,5.5,7.5
bar,small,5.5,8.5
foo,large,2.0,4.5
foo,small,2.333333,4.333333


In [7]:
table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], aggfunc={'D': "mean", 'E': ["min", "max", "mean"]})
table

Unnamed: 0_level_0,Unnamed: 1_level_0,D,E,E,E
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,max,mean,min
A,C,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,large,5.5,9,7.5,6
bar,small,5.5,9,8.5,8
foo,large,2.0,5,4.5,4
foo,small,2.333333,6,4.333333,2
