Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Possible error supermarket.xlsx pivot_table()-method part #3

Open
GiantDutchViking opened this issue Aug 8, 2023 · 2 comments
Open

Comments

@GiantDutchViking
Copy link

Hope this is the correct way of asking a question even though the tutorial has been posted quite some years ago:

@3h:23m in the tutorial you show the following code:
df_sales.pivot_table(index="Gender", aggfunc="sum")

but when I use that on the downloaded supermarket_sales.xlsx I get below stated error. Seeing the most bottom part it kinda looks like it makes sense but this tutorial being my 1st experience with Python I'm not sure if that's the case. Especially since your tutorial clearly shows you also have a datetime looking column
"TypeError: datetime64 type does not support sum operations"

Am I correct that the datetime column is being the culprit and any suggestions how to counter this?
Thanks for the tutorial btw, learning loads and having quite some fun!

`---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[31], line 2
1 # make a pivot table and add an aggegregate function
----> 2 df_sales.pivot_table(index="Gender", aggfunc="sum")

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\frame.py:8579, in DataFrame.pivot_table(self, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed, sort)
8562 @substitution("")
8563 @appender(_shared_docs["pivot_table"])
8564 def pivot_table(
(...)
8575 sort: bool = True,
8576 ) -> DataFrame:
8577 from pandas.core.reshape.pivot import pivot_table
-> 8579 return pivot_table(
8580 self,
8581 values=values,
8582 index=index,
8583 columns=columns,
8584 aggfunc=aggfunc,
8585 fill_value=fill_value,
8586 margins=margins,
8587 dropna=dropna,
8588 margins_name=margins_name,
8589 observed=observed,
8590 sort=sort,
8591 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\reshape\pivot.py:97, in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed, sort)
94 table = concat(pieces, keys=keys, axis=1)
95 return table.finalize(data, method="pivot_table")
---> 97 table = __internal_pivot_table(
98 data,
99 values,
100 index,
101 columns,
102 aggfunc,
103 fill_value,
104 margins,
105 dropna,
106 margins_name,
107 observed,
108 sort,
109 )
110 return table.finalize(data, method="pivot_table")

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\reshape\pivot.py:167, in __internal_pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed, sort)
164 values = list(values)
166 grouped = data.groupby(keys, observed=observed, sort=sort)
--> 167 agged = grouped.agg(aggfunc)
169 if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
170 agged = agged.dropna(how="all")

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\groupby\generic.py:1269, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
1266 func = maybe_mangle_lambdas(func)
1268 op = GroupByApply(self, func, args, kwargs)
-> 1269 result = op.agg()
1270 if not is_dict_like(func) and result is not None:
1271 return result

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\apply.py:160, in Apply.agg(self)
157 kwargs = self.kwargs
159 if isinstance(arg, str):
--> 160 return self.apply_str()
162 if is_dict_like(arg):
163 return self.agg_dict_like()

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\apply.py:496, in Apply.apply_str(self)
494 if "axis" in arg_names:
495 self.kwargs["axis"] = self.axis
--> 496 return self._try_aggregate_string_function(obj, f, *self.args, **self.kwargs)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\apply.py:565, in Apply._try_aggregate_string_function(self, obj, arg, *args, **kwargs)
563 if f is not None:
564 if callable(f):
--> 565 return f(*args, **kwargs)
567 # people may try to aggregate on a non-callable attribute
568 # but don't let them think they can pass args to it
569 assert len(args) == 0

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\groupby\groupby.py:2263, in GroupBy.sum(self, numeric_only, min_count, engine, engine_kwargs)
2258 else:
2259 # If we are grouping on categoricals we want unobserved categories to
2260 # return zero, rather than the default of NaN which the reindexing in
2261 # _agg_general() returns. GH #31422
2262 with com.temp_setattr(self, "observed", True):
-> 2263 result = self._agg_general(
2264 numeric_only=numeric_only,
2265 min_count=min_count,
2266 alias="sum",
2267 npfunc=np.sum,
2268 )
2270 return self._reindex_output(result, fill_value=0)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\groupby\groupby.py:1422, in GroupBy._agg_general(self, numeric_only, min_count, alias, npfunc)
1413 @Final
1414 def _agg_general(
1415 self,
(...)
1420 npfunc: Callable,
1421 ):
-> 1422 result = self._cython_agg_general(
1423 how=alias,
1424 alt=npfunc,
1425 numeric_only=numeric_only,
1426 min_count=min_count,
1427 )
1428 return result.finalize(self.obj, method="groupby")

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\groupby\groupby.py:1507, in GroupBy._cython_agg_general(self, how, alt, numeric_only, min_count, **kwargs)
1503 result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
1505 return result
-> 1507 new_mgr = data.grouped_reduce(array_func)
1508 res = self._wrap_agged_manager(new_mgr)
1509 out = self._wrap_aggregated_output(res)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\managers.py:1506, in BlockManager.grouped_reduce(self, func)
1504 result_blocks = extend_blocks(applied, result_blocks)
1505 else:
-> 1506 applied = blk.apply(func)
1507 result_blocks = extend_blocks(applied, result_blocks)
1509 if len(result_blocks) == 0:

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\blocks.py:329, in Block.apply(self, func, **kwargs)
323 @Final
324 def apply(self, func, **kwargs) -> list[Block]:
325 """
326 apply the function to my values; return a block if we are not
327 one
328 """
--> 329 result = func(self.values, **kwargs)
331 return self._split_op_result(result)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\groupby\groupby.py:1490, in GroupBy._cython_agg_general..array_func(values)
1488 def array_func(values: ArrayLike) -> ArrayLike:
1489 try:
-> 1490 result = self.grouper._cython_operation(
1491 "aggregate",
1492 values,
1493 how,
1494 axis=data.ndim - 1,
1495 min_count=min_count,
1496 **kwargs,
1497 )
1498 except NotImplementedError:
1499 # generally if we have numeric_only=False
1500 # and non-applicable functions
1501 # try to python agg
1502 # TODO: shouldn't min_count matter?
1503 result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\groupby\ops.py:959, in BaseGrouper._cython_operation(self, kind, values, how, axis, min_count, **kwargs)
957 ids, _, _ = self.group_info
958 ngroups = self.ngroups
--> 959 return cy_op.cython_operation(
960 values=values,
961 axis=axis,
962 min_count=min_count,
963 comp_ids=ids,
964 ngroups=ngroups,
965 **kwargs,
966 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\groupby\ops.py:645, in WrappedCythonOp.cython_operation(self, values, axis, min_count, comp_ids, ngroups, **kwargs)
641 is_numeric = is_numeric_dtype(dtype)
643 # can we do this operation with our cython functions
644 # if not raise NotImplementedError
--> 645 self._disallow_invalid_ops(dtype, is_numeric)
647 if not isinstance(values, np.ndarray):
648 # i.e. ExtensionArray
649 return self._ea_wrap_cython_operation(
650 values,
651 min_count=min_count,
(...)
654 **kwargs,
655 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\groupby\ops.py:257, in WrappedCythonOp._disallow_invalid_ops(self, dtype, is_numeric)
254 elif is_datetime64_any_dtype(dtype):
255 # Adding/multiplying datetimes is not valid
256 if how in ["sum", "prod", "cumsum", "cumprod"]:
--> 257 raise TypeError(f"datetime64 type does not support {how} operations")
258 elif is_period_dtype(dtype):
259 # Adding/multiplying Periods is not valid
260 if how in ["sum", "prod", "cumsum", "cumprod"]:

TypeError: datetime64 type does not support sum operations`

@HelenaSF95
Copy link

I am currently experiencing the same issue!

@HelenaSF95
Copy link

I found that specifying the values worked :) So, I'm just selecting the columns with numeric values leaving out other formats.

Here is my code:

pd.pivot_table(df_sales,index='Gender', values=['Quantity', 'Tax 5%', 'Total', 'gross margin percentage', 'gross income', 'Rating'],
aggfunc="sum", margins=False).round(1)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants