In [95]:
import numbers
import pandas as pd
from collections.abc import Iterable
import attr
import enum
import itertools as it
import functools
import io

import pandas.core.series

import numpy as np

from pandas.api import extensions as pdext # import ExtensionArray, ExtensionDtype, register_extension_dtype

# notes
# https://github.com/tomharvey/pandas-extension-dtype/blob/master/decimal_array.py
# https://github.com/0phoff/pygeos-pandas/blob/master/pgpd/_array.py
# https://pandas.pydata.org/pandas-docs/stable/development/extending.html#extension-types

In [136]:
class Objective(enum.Enum):
    MIN = -1
    MAX = 1
    
    #: Another way to name the maximization criteria.
    _MAX_ALIASES = [MAX, max, np.max, np.nanmax, np.amax, "max", "maximize", "+", ">"]
    
    #: Another ways to name the minimization criteria.
    _MIN_ALIASES = [MIN, min, np.min, np.nanmin, np.amin, "min", "minimize", "-", "<"]
    
    def __str__(self):
        return self.name
    
    @classmethod
    def construct_from_alias(cls, alias):
        if alias in [cls.MIN, cls.MAX]:
            return alias
        if isinstance(alias, str):
            alias = alias.lower()
        if alias in cls._MAX_ALIASES.value:
            return cls.MAX
        if alias in cls._MIN_ALIASES.value:
            return cls.MIN
        raise ValueError(f"Invalid criteria objective {alias}")


@pdext.register_extension_dtype
@attr.s(repr=False, hash=True)
class CriteriaDtype(pdext.ExtensionDtype):
    """A custom data type, to be paired with an ExtensionArray."""
    
    # dtype
    type = np.number
    name = "criteria"
    na_value = pd.NA
    
    _is_numeric = True
    _metadata = ("sense", "weight")
    
    # data itself
    objective = attr.ib(converter=Objective.construct_from_alias)
    weight = attr.ib(converter=float)
    
    def __repr__(self):
        return str(self)
    
    def __str__(self):
        return f"{type(self).__name__}({self.objective}, {self.weight})"
    
    @classmethod
    def construct_from_string(cls, string):
        """
        Construct this type from a string (ic. :attr:`~CriteriaDtype.name`).
        Args:
            string (str): The name of the type.
        Returns:
            CriteriaDtype: instance of the dtype.
        Raises:
            TypeError: string is not equal to "criteria".
        """
        if string == cls.name:
            return cls()
        else:
            raise TypeError(f'Cannot construct a "{cls.__name__}" from "{string}"')

    @classmethod
    def construct_array_type(cls):
        """Return the array type associated with this dtype."""
        return CriteriaArray
    

@attr.s(repr=False)
class CriteriaArray(pdext.ExtensionArray):
    
    
    data = attr.ib(converter=lambda v: np.asarray(v, dtype=float))
    _dtype = attr.ib(validator=attr.validators.instance_of(CriteriaDtype))

    # INTERNAL
    
    def __repr__(self):
        """x.__repr__() <==> repr(x)"""
        cls_name = type(self).__name__
        return f"{cls_name}({self.data}, {self.dtype})"
    
    @property
    def dtype(self):
        return self._dtype
    
    # =========================================================================
    # ExtensionArray Specific
    # =========================================================================
    
    @classmethod
    def _from_sequence(cls, scalars, dtype, copy=False):
        
        values = np.asarray(scalars, dtype=float)
        if copy:
            values = values.copy()
        return cls(data=values, dtype=dtype)

    @classmethod
    def _from_factorized(cls, values, original):
        """Reconstruct an ExtensionArray after factorization."""
        return cls(values)
    
    def __getitem__(self, key):
        if isinstance(key, numbers.Integral):
            return self.data[key]

        key = pd.api.indexers.check_array_indexer(self, key)
        if isinstance(key, (Iterable, slice)):
            return CriteriaArray(self.data[key], dtype=self.dtype)
        else:
            raise TypeError('Index type not supported', key)

    def __setitem__(self, key, value):
        self.data.__setitem__(key, value)

    def __len__(self):
        return len(self.data)

    def __eq__(self, other):
        return isinstance(other, self.__class__) and self.data == other.data and self.dtype == other.dtype

    @property
    def nbytes(self):
        return self.data.nbytes

    def isna(self):
        return pd.isna(self.data)

    def take(self, indices, allow_fill=False, fill_value=None):
        from pandas.core.algorithms import take

        if allow_fill:
            if fill_value is None or pd.isna(fill_value):
                fill_value = None
            elif not isinstance(fill_value, self.dtype.type):
                raise TypeError('Provide float or None as fill value')

        result = take(self.data, indices, allow_fill=allow_fill, fill_value=fill_value)

        if allow_fill and fill_value is None:
            result[pd.isna(result)] = None

        return self.__class__(result)

    def copy(self, order='C'):
        cdata = self.data.copy(order)
        return CriteriaArray(cdata, dtype=self.dtype)

    @classmethod
    def _concat_same_type(cls, to_concat):
        data = np.concatenate([c.data for c in to_concat], dtype=float)
        dtype = {c.dtype for c in to_concat}
        if len(dtype) > 1:
            raise ValueError("Multiple criteria dtype detected")
        return cls(data, dtype=dtype.pop())


In [142]:
cdtype = CriteriaDtype("+", 1)
crit = CriteriaArray(np.arange(10000), dtype=cdtype)
pd.Series(np.arange(10000)).astype("criteria[+, 1]")


TypeError: data type 'criteria[+, 1]' not understood

In [93]:

crit.dtype

CriteriaDtype(MAX, 1.0)

In [84]:
CriteriaArray.__init__

<function __main__.CriteriaArray.__init__(self, data, dtype) -> None>

In [102]:
CriteriaDtype("+", 1) == CriteriaDtype("+", 1)

True

In [117]:
np.array_str(np.arange(10000))

'[   0    1    2 ... 9997 9998 9999]'

'[   0,   1,   2,...,9997,9998,9999]'