# QuantityArray

## Review of a numpy series
A typical numpy series actually wraps a kind of ExtensionArray (here called NumpyExtensionArray), which itself wraps an actual numpy array.

In [5]:
import pandas as pd, numpy as np
s = pd.Series(np.arange(10))
print('s:')
print(s)
print("s.array:")
print(s.array) # the actual array backing a Series, will always be an ExtensionArray, a thin wrapper around one or more concrete arrays like a numpy.ndarray
print("s.values:")
print(s.values) # Return Series as ndarray or ndarray-like depending on the dtype.
print("s.to_numpy:")
print(s.to_numpy())

s:
0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64
s.array:
<NumpyExtensionArray>
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Length: 10, dtype: int64
s.values:
[0 1 2 3 4 5 6 7 8 9]
s.to_numpy:
[0 1 2 3 4 5 6 7 8 9]


## QuantityArray
Physipandas defines a QuantityArray to represent Quantity in series. Again, a series that contains quantity wraps an ExtensionArray here QuantityArray.

In [7]:
s = pd.Series(np.arange(10)*m, dtype='physipy[m]')
print('s:')
print(s)
print("s.array:")
print(s.array) # the actual array backing a Series, will always be an ExtensionArray, a thin wrapper around one or more concrete arrays like a numpy.ndarray
print("s.values:")
print(s.values) # Return Series as ndarray or ndarray-like depending on the dtype.
print("s.to_numpy:")
print(s.to_numpy())

s:
0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: physipy[m]
s.array:
INFO: default __repr__ handled by ExtensionArray:\\n<QuantityArray>
[0 m, 1 m, 2 m, 3 m, 4 m, 5 m, 6 m, 7 m, 8 m, 9 m]
Length: 10, dtype: physipy[m]
s.values:
INFO: default __repr__ handled by ExtensionArray:\\n<QuantityArray>
[0 m, 1 m, 2 m, 3 m, 4 m, 5 m, 6 m, 7 m, 8 m, 9 m]
Length: 10, dtype: physipy[m]
s.to_numpy:
[0 1 2 3 4 5 6 7 8 9]


If a favunit is used

In [8]:
from physipy import units
km = units['km']
s = pd.Series((np.arange(10)*m).iinto(km), dtype='physipy[m]')
print('s:')
print(s)
print("s.array:")
print(s.array) # the actual array backing a Series, will always be an ExtensionArray, a thin wrapper around one or more concrete arrays like a numpy.ndarray
print("s.values:")
print(s.values) # Return Series as ndarray or ndarray-like depending on the dtype.
print("s.to_numpy:")
print(s.to_numpy())

s:
0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: physipy[m]
s.array:
INFO: default __repr__ handled by ExtensionArray:\\n<QuantityArray>
[  0.0 km, 0.001 km, 0.002 km, 0.003 km, 0.004 km, 0.005 km, 0.006 km,
 0.007 km, 0.008 km, 0.009 km]
Length: 10, dtype: physipy[m]
s.values:
INFO: default __repr__ handled by ExtensionArray:\\n<QuantityArray>
[  0.0 km, 0.001 km, 0.002 km, 0.003 km, 0.004 km, 0.005 km, 0.006 km,
 0.007 km, 0.008 km, 0.009 km]
Length: 10, dtype: physipy[m]
s.to_numpy:
[0 1 2 3 4 5 6 7 8 9]


# Numpy support

In [1]:
import pandas as pd
import numpy as np
from physipandas import QuantityArray
from physipy import m, s

In [2]:
arr = np.arange(10)
arrq = arr * m
a = QuantityArray(arrq)
b = QuantityArray(arrq, dtype=m)
# Raises : GOOD c = QuantityArray(arrq, dtype=s)

All of these are working properly

In [3]:
from pandas.core.algorithms import take
take(b._data, [1,2,3])

  take(b._data, [1,2,3])


array([1, 2, 3])

In [4]:
np.sqrt(a)
np.abs(a)
np.prod(a)
np.hypot(a, a)
np.trapz(a)
np.mean(a)
np.sort(a)
a+1*m
a+np.arange(10)*m
a+a
2*a
a*2
a**2
a/2
2/a
a**0.5
a[0]
a[0:2]
#print(a.iloc[0:3])
any(a)
all(a)
# print(sum(a)) # start init value dimension error

  return type(self)(self.value / y.value,


False

# QuantityArray performance

In [5]:
import pandas as pd
import numpy as np
from physipandas import QuantityArray
from physipy import m

In [6]:
arr = np.arange(10)
arrq = arr * m
arrqar = QuantityArray(arrq)
arrnps = pd.Series(arr)
arrs = pd.Series(QuantityArray(arrq), dtype="physipy[m]")

In [7]:
%timeit np.sqrt(arr)
%timeit np.sqrt(arrq)
%timeit np.sqrt(arrqar)
%timeit np.sqrt(arrnps)
%timeit np.sqrt(arrs)

1.19 µs ± 31.6 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
9.24 µs ± 28.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
401 µs ± 45.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
63.6 µs ± 329 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
486 µs ± 15.3 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


- PintArray : https://github.com/hgrecco/pint-pandas/blob/cf527e48557a1e028c6f2d4e628aa7a6cd1b30d4/pint_pandas/pint_array.py#L181

- Pandas ExtensionArray : https://pandas.pydata.org/docs/reference/api/pandas.api.extensions.ExtensionArray.html

# Tutorial

Attributes
 - dtype : An instance of 'ExtensionDtype', to define as a property
 - nbytes : The number of bytes needed to store this object in memory. Return an int
 - ndim : 
 - shape : 
 
 
Methods : 
 - _from_sequence : Construct a new ExtensionArray from a sequence of scalars.
 - __getitem__ : For scalar ``item``, return a scalar value suitable for the array's type. This should be an instance of ``self.dtype.type``. For slice ``key``, return an instance of ``ExtensionArray``, even if the slice is length 0 or 1. For a boolean mask, return an instance of ``ExtensionArray``, filtered to the values where ``item`` is True.
 - __len__ : len of this array
 - __eq__ : this should return a boolean numpy ndarray or a boolean ExtensionArray. When `other` is one of Series, Index, or DataFrame, this method should return  otImplemented (to ensure that those objects are responsible for first unpacking the arrays, and then dispatch the operation to the underlying arrays)
 - isna : A 1-D array indicating if each value is missing. In most cases, this should return a NumPy ndarray
 - take : called by ``Series.__getitem__``, ``.loc``, ``iloc``, when `indices` is a sequence of values. Should return another ExtensionArray instance, with dtype of corresponding Dtype. if allow_fill and fill_value is None: fill_value = self.dtype.na_value (here, self.dtype.na_value is the nan value of the corresponding Dtype)
 - copy : such that self.copy() returns a copy of the ExtensionArray. Should use a copy of the base data, and same Dtype
 - _concatenate_same_type : Concatenate multiple array of this dtype. Basically convert a list of ExtentionArray of the same Dtype to a single concatenated ExtensionArray of the same Dtype
 
Second : 
 - _reduce 
 - dropna : 
        

In [None]:
@property
def dtype(self) -> ExtensionDtype:
    """
    An instance of 'ExtensionDtype'.
    """
    raise AbstractMethodError(self)
    
@property
def nbytes(self) -> int:
    """
    The number of bytes needed to store this object in memory.
    """
    # If this is expensive to compute, return an approximate lower bound
    # on the number of bytes needed.
    raise AbstractMethodError(self)
    
def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
    """
    A 1-D array indicating if each value is missing.
    Returns
    -------
    na_values : Union[np.ndarray, ExtensionArray]
        In most cases, this should return a NumPy ndarray. For
        exceptional cases like ``SparseArray``, where returning
        an ndarray would be expensive, an ExtensionArray may be
        returned.
    Notes
    -----
    If returning an ExtensionArray, then
    * ``na_values._is_boolean`` should be True
    * `na_values` should implement :func:`ExtensionArray._reduce`
    * ``na_values.any`` and ``na_values.all`` should be implemented
    """
    raise AbstractMethodError(self)

def take(
    self: ExtensionArrayT,
    indices: Sequence[int],
    *,
    allow_fill: bool = False,
    fill_value: Any = None,
) -> ExtensionArrayT:
    """
    Take elements from an array.
    Parameters
    ----------
    indices : sequence of int
        Indices to be taken.
    allow_fill : bool, default False
        How to handle negative values in `indices`.
        * False: negative values in `indices` indicate positional indices
          from the right (the default). This is similar to
          :func:`numpy.take`.
        * True: negative values in `indices` indicate
          missing values. These values are set to `fill_value`. Any other
          other negative values raise a ``ValueError``.
    fill_value : any, optional
        Fill value to use for NA-indices when `allow_fill` is True.
        This may be ``None``, in which case the default NA value for
        the type, ``self.dtype.na_value``, is used.
        For many ExtensionArrays, there will be two representations of
        `fill_value`: a user-facing "boxed" scalar, and a low-level
        physical NA value. `fill_value` should be the user-facing version,
        and the implementation should handle translating that to the
        physical version for processing the take if necessary.
    Returns
    -------
    ExtensionArray
    Raises
    ------
    IndexError
        When the indices are out of bounds for the array.
    ValueError
        When `indices` contains negative values other than ``-1``
        and `allow_fill` is True.
    See Also
    --------
    numpy.take : Take elements from an array along an axis.
    api.extensions.take : Take elements from an array.
    Notes
    -----
    ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
    ``iloc``, when `indices` is a sequence of values. Additionally,
    it's called by :meth:`Series.reindex`, or any other method
    that causes realignment, with a `fill_value`.
    Examples
    --------
    Here's an example implementation, which relies on casting the
    extension array to object dtype. This uses the helper method
    :func:`pandas.api.extensions.take`.
    .. code-block:: python
       def take(self, indices, allow_fill=False, fill_value=None):
           from pandas.core.algorithms import take
           # If the ExtensionArray is backed by an ndarray, then
           # just pass that here instead of coercing to object.
           data = self.astype(object)
           if allow_fill and fill_value is None:
               fill_value = self.dtype.na_value
           # fill value should always be translated from the scalar
           # type for the array, to the physical storage type for
           # the data, before passing to take.
           result = take(data, indices, fill_value=fill_value,
                         allow_fill=allow_fill)
           return self._from_sequence(result, dtype=self.dtype)
    """
    # Implementer note: The `fill_value` parameter should be a user-facing
    # value, an instance of self.dtype.type. When passed `fill_value=None`,
    # the default of `self.dtype.na_value` should be used.
    # This may differ from the physical storage type your ExtensionArray
    # uses. In this case, your implementation is responsible for casting
    # the user-facing type to the storage type, before using
    # pandas.api.extensions.take
    raise AbstractMethodError(self)

    # Base Pandas implementation example
    # from pandas.core.algorithms import take
    # # If the ExtensionArray is backed by an ndarray, then
    # # just pass that here instead of coercing to object.
    # data = self.astype(object)
    # if allow_fill and fill_value is None:
    #     fill_value = self.dtype.na_value
    # # fill value should always be translated from the scalar
    # # type for the array, to the physical storage type for
    # # the data, before passing to take.
    # result = take(data, indices, fill_value=fill_value,
    #               allow_fill=allow_fill)
    # return self._from_sequence(result, dtype=self.dtype)
    #
    # Pintpandas implementation
    # data = self._data
    # if allow_fill and fill_value is None:
    #     fill_value = self.dtype.na_value
    # if isinstance(fill_value, _Quantity):
    #     fill_value = fill_value.to(self.units).magnitude
    # result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill)
    # return PintArray(result, dtype=self.dtype)

    def copy(self: ExtensionArrayT) -> ExtensionArrayT:
        """
        Return a copy of the array.
        Returns
        -------
        ExtensionArray
        """
        raise AbstractMethodError(self)
        
    # Pintpandas implementation
    # def copy(self, deep=False):
    # data = self._data
    # if deep:
    #     data = copy.deepcopy(data)
    # else:
    #     data = data.copy()
    # return type(self)(data, dtype=self.dtype)
    
    
    @classmethod
    def _concat_same_type(
        cls: type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT]
    ) -> ExtensionArrayT:
        """
        Concatenate multiple array of this dtype.
        Parameters
        ----------
        to_concat : sequence of this type
        Returns
        -------
        ExtensionArray
        """
        # Implementer note: this method will only be called with a sequence of
        # ExtensionArrays of this class and with the same dtype as self. This
        # should allow "easy" concatenation (no upcasting needed), and result
        # in a new ExtensionArray of the same dtype.
        # Note: this strict behaviour is only guaranteed starting with pandas 1.1
        raise AbstractMethodError(cls)
        
    # Pintpandas implementation
    # @classmethod
    # def _concat_same_type(cls, to_concat):
    #     output_units = to_concat[0].units

    #     data = []
    #     for a in to_concat:
    #         converted_values = a.quantity.to(output_units).magnitude
    #         data.append(np.atleast_1d(converted_values))

    #     return cls(np.concatenate(data), output_units)
    
def __eq__(self, other: Any) -> ArrayLike:  # type: ignore[override]
        """
        Return for `self == other` (element-wise equality).
        """
        # Implementer note: this should return a boolean numpy ndarray or
        # a boolean ExtensionArray.
        # When `other` is one of Series, Index, or DataFrame, this method should
        # return NotImplemented (to ensure that those objects are responsible for
        # first unpacking the arrays, and then dispatch the operation to the
        # underlying arrays)
        raise AbstractMethodError(self)
        
def __len__(self) -> int:
    """
    Length of this array
    Returns
    -------
    length : int
    """
    raise AbstractMethodError(self)

# pintpandas implementation
# def __len__(self):
# # type: () -> int
# """Length of this array
# Returns
# -------
# length : int
# """
# return len(self._data)

    def __getitem__(self, item: PositionalIndexer) -> ExtensionArray | Any:
        """
        Select a subset of self.
        Parameters
        ----------
        item : int, slice, or ndarray
            * int: The position in 'self' to get.
            * slice: A slice object, where 'start', 'stop', and 'step' are
              integers or None
            * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
        Returns
        -------
        item : scalar or ExtensionArray
        Notes
        -----
        For scalar ``item``, return a scalar value suitable for the array's
        type. This should be an instance of ``self.dtype.type``.
        For slice ``key``, return an instance of ``ExtensionArray``, even
        if the slice is length 0 or 1.
        For a boolean mask, return an instance of ``ExtensionArray``, filtered
        to the values where ``item`` is True.
        """
        raise AbstractMethodError(self)
        
# if is_integer(item):
#     return self._data[item] * self.units
# 
# item = check_array_indexer(self, item)
# 
# return self.__class__(self._data[item], self.dtype)

# @classmethod
# def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
#     """
#     Construct a new ExtensionArray from a sequence of scalars.
#     Parameters
#     ----------
#     scalars : Sequence
#         Each element will be an instance of the scalar type for this
#         array, ``cls.dtype.type`` or be converted into this type in this method.
#     dtype : dtype, optional
#         Construct for this particular dtype. This should be a Dtype
#         compatible with the ExtensionArray.
#     copy : bool, default False
#         If True, copy the underlying data.
#     Returns
#     -------
#     ExtensionArray
#     """
#     raise AbstractMethodError(cls)

# @classmethod
# def _from_sequence(cls, scalars, dtype=None, copy=False):
#     """
#     Initialises a PintArray from a list like of quantity scalars or a list like of floats and dtype
#     -----
#     Usage
#     PintArray._from_sequence([Q_(1,"m"),Q_(2,"m")])
#     """
#     master_scalar = None
#     try:
#         master_scalar = next(i for i in scalars if hasattr(i, "units"))
#     except StopIteration:
#         if isinstance(scalars, PintArray):
#             dtype = scalars._dtype
#         if dtype is None:
#             raise ValueError(
#                 "Cannot infer dtype. No dtype specified and empty array"
#             )
#     if dtype is None and not isinstance(master_scalar, _Quantity):
#         raise ValueError("No dtype specified and not a sequence of quantities")
#     if dtype is None and isinstance(master_scalar, _Quantity):
#         dtype = PintType(master_scalar.units)

#     def quantify_nan(item):
#         if type(item) is float:
#             return item * dtype.units
#         return item

#     if isinstance(master_scalar, _Quantity):
#         scalars = [quantify_nan(item) for item in scalars]
#         scalars = [item.to(dtype.units).magnitude for item in scalars]
#     return cls(scalars, dtype=dtype, copy=copy)

    def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
        """
        Return a scalar result of performing the reduction operation.
        Parameters
        ----------
        name : str
            Name of the function, supported values are:
            { any, all, min, max, sum, mean, median, prod,
            std, var, sem, kurt, skew }.
        skipna : bool, default True
            If True, skip NaN values.
        **kwargs
            Additional keyword arguments passed to the reduction function.
            Currently, `ddof` is the only supported kwarg.
        Returns
        -------
        scalar
        Raises
        ------
        TypeError : subclass does not define reductions
        """
        raise TypeError(f"cannot perform {name} with type {self.dtype}")

    def _reduce(self, name, skipna=True, **kwds):
        """
        Return a scalar result of performing the reduction operation.
        Parameters
        ----------
        name : str
            Name of the function, supported values are:
            { any, all, min, max, sum, mean, median, prod,
            std, var, sem, kurt, skew }.
        skipna : bool, default True
            If True, skip NaN values.
        **kwargs
            Additional keyword arguments passed to the reduction function.
            Currently, `ddof` is the only supported kwarg.
        Returns
        -------
        scalar
        Raises
        ------
        TypeError : subclass does not define reductions
        """
        functions = {
            "all": all,
            "any": any,
            "min": min,
            "max": max,
            "sum": sum,
            "mean": np.mean,
            "median": np.median,
        }
        if name not in functions:
            raise TypeError(f"cannot perform {name} with type {self.dtype}")

        if skipna:
            quantity = self.dropna().quantity
        else:
            quantity = self.quantity

        return functions[name](quantity)
    
        def dropna(self):
        """
        Return ExtensionArray without NA values.
        Returns
        -------
        valid : ExtensionArray
        """
        # error: Unsupported operand type for ~ ("ExtensionArray")
        return self[~self.isna()]  # type: ignore[operator]