Skip to content

Commit

Permalink
apacheGH-41389: [Python] Expose byte_width and bit_width of Extension…
Browse files Browse the repository at this point in the history
…Type in terms of the storage type (apache#41413)

### Rationale for this change

This update aligns the Python API with Arrow C++ by exposing the actual byte and bit widths of extension types from their storage type.

### What changes are included in this PR?

- Expose byte_width and bit_width properties for ExtensionType in Python, reflecting the underlying storage type.
- Add  unit tests to verify these properties

### Are these changes tested?

Yes

### Are there any user-facing changes?

Yes

* GitHub Issue: apache#41389

Lead-authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
  • Loading branch information
llama90 and jorisvandenbossche committed May 21, 2024
1 parent 1f07404 commit e254c43
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 2 deletions.
2 changes: 2 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2801,6 +2801,8 @@ cdef extern from "arrow/extension_type.h" namespace "arrow":
cdef cppclass CExtensionType" arrow::ExtensionType"(CDataType):
c_string extension_name()
shared_ptr[CDataType] storage_type()
int byte_width()
int bit_width()

@staticmethod
shared_ptr[CArray] WrapArray(shared_ptr[CDataType] ext_type,
Expand Down
30 changes: 28 additions & 2 deletions python/pyarrow/tests/test_extension_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,14 +251,14 @@ def test_ext_type_repr():
assert repr(ty) == "IntegerType(DataType(int64))"


def test_ext_type__lifetime():
def test_ext_type_lifetime():
ty = UuidType()
wr = weakref.ref(ty)
del ty
assert wr() is None


def test_ext_type__storage_type():
def test_ext_type_storage_type():
ty = UuidType()
assert ty.storage_type == pa.binary(16)
assert ty.__class__ is UuidType
Expand All @@ -267,6 +267,32 @@ def test_ext_type__storage_type():
assert ty.__class__ is ParamExtType


def test_ext_type_byte_width():
# Test for fixed-size binary types
ty = UuidType()
assert ty.byte_width == 16
ty = ParamExtType(5)
assert ty.byte_width == 5

# Test for non fixed-size binary types
ty = LabelType()
with pytest.raises(ValueError, match="Non-fixed width type"):
_ = ty.byte_width


def test_ext_type_bit_width():
# Test for fixed-size binary types
ty = UuidType()
assert ty.bit_width == 128
ty = ParamExtType(5)
assert ty.bit_width == 40

# Test for non fixed-size binary types
ty = LabelType()
with pytest.raises(ValueError, match="Non-fixed width type"):
_ = ty.bit_width


def test_ext_type_as_py():
ty = UuidType()
expected = uuid4()
Expand Down
18 changes: 18 additions & 0 deletions python/pyarrow/types.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1519,6 +1519,24 @@ cdef class BaseExtensionType(DataType):
"""
return pyarrow_wrap_data_type(self.ext_type.storage_type())

@property
def byte_width(self):
"""
The byte width of the extension type.
"""
if self.ext_type.byte_width() == -1:
raise ValueError("Non-fixed width type")
return self.ext_type.byte_width()

@property
def bit_width(self):
"""
The bit width of the extension type.
"""
if self.ext_type.bit_width() == -1:
raise ValueError("Non-fixed width type")
return self.ext_type.bit_width()

def wrap_array(self, storage):
"""
Wrap the given storage array as an extension array.
Expand Down

0 comments on commit e254c43

Please sign in to comment.