Skip to content

BUG: Pandas pivot_table fails with columns and margin #38691

Open
@davidmakovoz

Description

@davidmakovoz
  • [x ] I have checked that this issue has not already been reported.

  • [ x] I have confirmed this bug exists on the latest version of pandas.

  • (optional) I have confirmed this bug exists on the master branch of pandas.


Note: Please read this guide detailing how to provide the necessary information for us to reproduce your bug.

Code Sample, a copy-pastable example

arrays = [['bar', 'bar', 'foo', 'foo'],
          ['one', 'two', 'one', 'two'], ['A','A','B','B']]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second', 'third'])
values = np.array([[1,2,3,4],[5,6,7,8]])
df = pd.DataFrame(values.T, index=index, columns = ['0', '1'])

# here comes the pivot_table, this one works, it has 'colums'
df.pivot_table(index = ['first', 'second'],  columns = 'third', aggfunc = 'sum')

#this one works, it has 'margins'
df.pivot_table(index = ['first', 'second'],  aggfunc = 'sum', margins=True)

#this one fails, it has both 'columns' and 'margins'
df.pivot_table(index = ['first', 'second'],  columns = 'third', aggfunc = 'sum', margins=True)

Problem description


KeyError                                  Traceback (most recent call last)
<ipython-input-5-a9c12a5808db> in <module>
     13 
     14 #this one fails, it has both 'columns' and 'margins'
---> 15 df.pivot_table(index = ['first', 'second'],  columns = 'third', aggfunc = 'sum', margins=True)

~\Anaconda3\lib\site-packages\pandas\core\frame.py in pivot_table(self, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed)
   6068         from pandas.core.reshape.pivot import pivot_table
   6069 
-> 6070         return pivot_table(
   6071             self,
   6072             values=values,

~\Anaconda3\lib\site-packages\pandas\core\reshape\pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed)
    156         if dropna:
    157             data = data[data.notna().all(axis=1)]
--> 158         table = _add_margins(
    159             table,
    160             data,

~\Anaconda3\lib\site-packages\pandas\core\reshape\pivot.py in _add_margins(table, data, values, rows, cols, aggfunc, observed, margins_name, fill_value)
    226 
    227     elif values:
--> 228         marginal_result_set = _generate_marginal_results(
    229             table,
    230             data,

~\Anaconda3\lib\site-packages\pandas\core\reshape\pivot.py in _generate_marginal_results(table, data, values, rows, cols, aggfunc, observed, grand_margin, margins_name)
    323 
    324         if len(rows) > 0:
--> 325             margin = data[rows + values].groupby(rows, observed=observed).agg(aggfunc)
    326             cat_axis = 1
    327 

~\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2804             if is_iterator(key):
   2805                 key = list(key)
-> 2806             indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
   2807 
   2808         # take() does not accept boolean indexers

~\Anaconda3\lib\site-packages\pandas\core\indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
   1550             keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
   1551 
-> 1552         self._validate_read_indexer(
   1553             keyarr, indexer, o._get_axis_number(axis), raise_missing=raise_missing
   1554         )

~\Anaconda3\lib\site-packages\pandas\core\indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing)
   1644             if not (self.name == "loc" and not raise_missing):
   1645                 not_found = list(set(key) - set(ax))
-> 1646                 raise KeyError(f"{not_found} not in index")
   1647 
   1648             # we skip the warning on Categorical/Interval

KeyError: "['first', 'second'] not in index"

Expected Output

No error thrown.

Output of pd.show_versions()

[paste the output of pd.show_versions() here leaving a blank line after the details tag]
INSTALLED VERSIONS

commit : None
python : 3.8.3.final.0
python-bits : 64
OS : Windows
OS-release : 10
machine : AMD64
processor : Intel64 Family 6 Model 158 Stepping 10, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : English_United States.1252

pandas : 1.0.5
numpy : 1.18.5
pytz : 2020.1
dateutil : 2.8.1
pip : 20.1.1
setuptools : 49.2.0.post20200714
Cython : 0.29.21
pytest : 5.4.3
hypothesis : None
sphinx : 3.1.2
blosc : None
feather : None
xlsxwriter : 1.2.9
lxml.etree : 4.5.2
html5lib : 1.1
pymysql : None
psycopg2 : None
jinja2 : 2.11.2
IPython : 7.16.1
pandas_datareader: None
bs4 : 4.9.1
bottleneck : 1.3.2
fastparquet : None
gcsfs : None
lxml.etree : 4.5.2
matplotlib : 3.2.2
numexpr : 2.7.1
odfpy : None
openpyxl : 3.0.4
pandas_gbq : None
pyarrow : None
pytables : None
pytest : 5.4.3
pyxlsb : None
s3fs : None
scipy : 1.5.0
sqlalchemy : 1.3.18
tables : 3.6.1
tabulate : None
xarray : None
xlrd : 1.2.0
xlwt : 1.3.0
xlsxwriter : 1.2.9
numba : 0.50.1

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugReshapingConcat, Merge/Join, Stack/Unstack, Explode

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions