Skip to content

Commit

Permalink
Put the Awkward-in-Pandas feature up for a vote, citing #350.
Browse files Browse the repository at this point in the history
  • Loading branch information
jpivarski committed Jul 23, 2020
1 parent 61f0ab5 commit 284c319
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 19 deletions.
54 changes: 35 additions & 19 deletions src/awkward1/_connect/_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,24 @@ def register():
PandasMixin.__bases__ = (pandas.api.extensions.ExtensionArray,)


def vote():
global AwkwardDtype

if AwkwardDtype is None:
raise RuntimeError(
"You seem to be trying to use an Awkward Array as a Pandas Series "
"or DataFrame column. This is currently allowed if you first call"
"\n\n ak.pandas.register()\n\nbut it is being considered for "
"deprecation. See"
"\n\n https://github.com/scikit-hep/awkward-1.0/issues/350\n\n"
"for reasons why it may be removed and explain your use-case there "
"if you don't want it to be removed. Note that this is distinct from"
"\n\n ak.pandas.df(array)\n ak.pandas.dfs(array)\n\n"
"which may work better for you anyway, depending on what you're "
"trying to accomplish."
)


checked_version = False


Expand Down Expand Up @@ -106,7 +124,7 @@ def __hash__(self):
class PandasMixin(PandasNotImportedYet):
@property
def _typ(self):
register()
vote()
return "dataframe"

@property
Expand All @@ -117,7 +135,7 @@ def columns(self):
return [NoFields()]

def _ixs(self, i, axis):
register()
vote()
if self.layout.numfields >= 0:
return get_pandas().Series(self[str(i)])
else:
Expand All @@ -128,7 +146,7 @@ def _ixs(self, i, axis):
@classmethod
def _from_sequence(cls, scalars, *args, **kwargs):
# https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray._from_sequence.html
register()
vote()
dtype, copy = awkward1._util.extra(
args, kwargs, [("dtype", None), ("copy", False)]
)
Expand All @@ -137,7 +155,7 @@ def _from_sequence(cls, scalars, *args, **kwargs):
@classmethod
def _from_factorized(cls, values, original):
# https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray._from_factorized.html
register()
vote()
raise NotImplementedError("_from_factorized")

# __getitem__(self)
Expand All @@ -149,7 +167,7 @@ def dtype(self):
if awkward1._util.called_by_module(
"pandas"
) and not awkward1._util.called_by_module("dask"):
register()
vote()
if isinstance(self.layout, awkward1.partition.PartitionedArray):
raise ValueError(
"partitioned arrays cannot be Pandas columns; "
Expand Down Expand Up @@ -178,15 +196,15 @@ def shape(self):

def isna(self):
# https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray.isna.html
register()
vote()
return numpy.array(awkward1.operations.structure.is_none(self))

def take(self, indices, *args, **kwargs):
# https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray.take.html
allow_fill, fill_value = awkward1._util.extra(
args, kwargs, [("allow_fill", False), ("fill_value", None)]
)
register()
vote()

if allow_fill:
content1 = self.layout
Expand Down Expand Up @@ -230,7 +248,7 @@ def copy(self):
@classmethod
def _concat_same_type(cls, to_concat):
# https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray._concat_same_type.html
register()
vote()
return awkward1.operations.structure.concatenate(to_concat)

# RECOMMENDED for performance:
Expand All @@ -241,60 +259,59 @@ def _concat_same_type(cls, to_concat):
# ("value", None),
# ("method", None),
# ("limit", None)])
# register()
# vote()
# raise NotImplementedError
#
# def dropna(self):
# # https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray.dropna.html
# register()
# vote()
# raise NotImplementedError
#
# def unique(self):
# # https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray.unique.html
# register()
# vote()
# raise NotImplementedError
#
# def factorize(self, na_sentinel):
# # https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray.factorize.html
# register()
# vote()
# raise NotImplementedError
#
# def _values_for_factorize(self):
# # https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray._values_for_factorize.html
# register()
# vote()
# raise NotImplementedError
#
# def argsort(self, *args, **kwargs):
# # https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray.argsort.html
# ascending, kind = awkward1._util.extra(args, kwargs, [
# ("ascending", True),
# ("kind", "quicksort")]) # "quicksort", "mergesort", "heapsort"
# register()
# vote()
# raise NotImplementedError
#
# def _values_for_argsort(self):
# # https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray._values_for_argsort.html
# register()
# vote()
# raise NotImplementedError
#
# def searchsorted(self, value, *args, **kwargs):
# # https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray.searchsorted.html
# side, sorter = awkward1._util.extra(args, kwargs, [
# ("side", "left"),
# ("sorter", None)])
# register()
# vote()
# raise NotImplementedError
#
# def _reduce(self, name, *args, **kwargs):
# # https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.api.extensions.ExtensionArray._reduce.html
# skipna, = awkward1._util.extra(args, kwargs, [
# ("skipna", True)])
# register()
# vote()
# raise NotImplementedError


def df(array, how="inner", levelname=lambda i: "sub" * i + "entry", anonymous="values"):
register()
pandas = get_pandas()
out = None
for df in dfs(array, levelname=levelname, anonymous=anonymous):
Expand All @@ -306,7 +323,6 @@ def df(array, how="inner", levelname=lambda i: "sub" * i + "entry", anonymous="v


def dfs(array, levelname=lambda i: "sub" * i + "entry", anonymous="values"):
register()
pandas = get_pandas()

def recurse(layout, row_arrays, col_names):
Expand Down
2 changes: 2 additions & 0 deletions tests/test_0090-as-pandas-extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

pandas = pytest.importorskip("pandas")

awkward1.pandas.register()

def test_numpy_structured_arrays_cant_be_pandas_printed():
a = awkward1.Array([{"a": 1}, {"a": 2}, {"a": 3}, {"a": 4}, {"a": 5}])
df = pandas.DataFrame({"column": a})
Expand Down

0 comments on commit 284c319

Please sign in to comment.