In [None]:
#| default_exp multi_enum

# MultiEnum
> A powerful solution for mapping multiple aliases to a single canonical value in Python.

One of the most common, and most frustrating, issues I’ve encountered in real-world data work is the proliferation of different names that refer to the same thing. For example, I was once tasked with aggregating cash balances across different business lines, and various teams would use slightly different names to refer to the same bank (e.g. “JP Morgan Chase” vs “Chase Bank”). This issue kept cropping up in different projects, so I created this custom object as a generic solution to a common problem.

The MultiEnum class demonstrated here uses several advanced concepts such as metaclass programming, custom enum schemas, and integration with Pydantic for seamless validation and serialization. It allows multiple values (aliases) to map to a single canonical value, ensuring that data normalization is handled gracefully across diverse datasets. By leveraging Python’s Enum type system, this class ensures that business logic remains clean and efficient, while maintaining flexibility for real-world data complexities.

In [None]:
#| exporti 

from collections import defaultdict
from enum import Enum,EnumMeta
from typing import *
from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler
from pydantic_core import core_schema
from pydantic.json_schema import JsonSchemaValue

In [None]:
#| hide 
from nbdev.showdoc import show_doc

## Definitions

In [None]:
#| exporti

def merge_dicts_of_tuples(*dicts,allow_duplicate_keys=True):
    """merges an arbitrary number of dictionaries with tuple values by concatenating values with the same keys."""
    out=defaultdict(tuple)
    for d in dicts:
        for k,v in d.items():
            if not allow_duplicate_keys and out.get(k):
                raise ValueError(f"{k} is duplicated")
            
            else: 
                out[k] += v
            
    return dict(out)

In [None]:
show_doc(merge_dicts_of_tuples)

---

### merge_dicts_of_tuples

>      merge_dicts_of_tuples (*dicts, allow_duplicate_keys=True)

*merges an arbitrary number of dictionaries with tuple values by concatenating values with the same keys.*

In [None]:
dict1 = {'color': ('red',)}
dict2 = {'color':('blue','yellow'), 'height':(10,) , 'weight':(5,)}
dict3 = {'color': ('orange',), 'height':(20,30,40)}

merged = merge_dicts_of_tuples(dict1,dict2,dict3)
assert merged['color']==('red', 'blue', 'yellow', 'orange')

In [None]:
#| exporti 

class MultiEnumMeta(EnumMeta):
    
    """
    Metaclass for MultiEnum, adding support for merging multiple MultiEnum objects and managing multiple aliases 
    for a single canonical value.

    Methods:
    - `__add__(cls, other)`: Combine two MultiEnum objects, merging their member mappings.  
    - `__iadd__(cls, other)`: In-place addition of MultiEnum objects, ensuring no duplicate aliases while preserving the canonical value.  
    - `__radd__(cls, other)`: Right-hand addition for merging multiple MultiEnum objects.  
    - `to_dict(cls)`: Convert the MultiEnum members into a dictionary, where each key is mapped to a tuple of canonical value and its aliases.  
    """
    
    def __add__(cls, other):
        """You can add MultiEnum objects together to return their combined `_member_map_`s """
        return merge_dicts_of_tuples(cls.to_dict(),other.to_dict(),allow_duplicate_keys=False)
    
    def __iadd__(cls, other):
        """This allows the += operator to merge two MultiEnums while preserving the order of the canonical value."""
        combined = merge_dicts_of_tuples(cls.to_dict(), other.to_dict(), allow_duplicate_keys=True)
        
        for key, values in combined.items():
            # Preserve the canonical value (the first value of the original MultiEnum)
            original_canonical_value = cls._value2member_map_[key]._value_ if key in cls._value2member_map_ else None
            
            # Deduplicate while preserving order; keep the canonical value as the first element
            seen = set()
            deduplicated_values = []
            
            # Start with the canonical value, ensuring it's the first one in the list
            if original_canonical_value:
                deduplicated_values.append(original_canonical_value)
                seen.add(original_canonical_value)

            # Add other values, excluding any duplicates
            for value in values:
                if value not in seen:
                    deduplicated_values.append(value)
                    seen.add(value)
            
            combined[key] = tuple(deduplicated_values)
        
        return combined
    
    def __radd__(cls, other):
        """This allows you to merge mulitple MultiEnums"""
        return merge_dicts_of_tuples(cls.to_dict(),other,allow_duplicate_keys=False)
    
    def to_dict(cls) -> Dict[str,Tuple]:
        return {k:v._all_values for k,v in cls._member_map_.items()}
    

In [None]:
show_doc(MultiEnumMeta)

---

### MultiEnumMeta

>      MultiEnumMeta (cls, bases, classdict, boundary=None, _simple=False,
>                     **kwds)

*Metaclass for MultiEnum, adding support for merging multiple MultiEnum objects and managing multiple aliases 
for a single canonical value.

Methods:
- `__add__(cls, other)`: Combine two MultiEnum objects, merging their member mappings.
- `__iadd__(cls, other)`: In-place addition of MultiEnum objects, ensuring no duplicate aliases while preserving the canonical value.
- `__radd__(cls, other)`: Right-hand addition for merging multiple MultiEnum objects.
- `to_dict(cls)`: Convert the MultiEnum members into a dictionary, where each key is mapped to a tuple of canonical value and its aliases.*

In [None]:
#| export 

class MultiEnum(Enum,metaclass=MultiEnumMeta):
    """
    A Many-to-One mapping in which none of the possible options can be mapped to more than one value. 
    
    """

    def __new__(cls, *values):
        obj = object.__new__(cls)
        # first value is canonical value
        
        obj._value_ = values[0]
        
        for other_value in set(values[1:]):

            existing_map = cls._value2member_map_.get(other_value) # this MUST be none
            if existing_map:
                # if the any of the acceptable values for the new obj already have a mapped instance,throw an error
                raise ValueError(f"{other_value} is trying to be mapped to {obj._value_}, but it has already been mapped to instance {existing_map}. You can't have the same value point to different objects.")
            cls._value2member_map_[other_value] = obj
        obj._all_values = tuple(x for x in values if x != ...)
        
        return obj
    
    def __init__(self,*args,**kwargs):
        # add the name of the object to the list of acceptable values if not already there
        if not self.__class__._value2member_map_.get(self.name):
            self.__class__._value2member_map_[self.name] = self
            self._all_values = self._all_values + (self.name,)
        super().__init__()

    def __repr__(self):
        return '<%s.%s: %s>' % (
                self.__class__.__name__,
                self._name_,
                ', '.join([repr(v) for v in self._all_values]),
        )
    @classmethod
    def __get_pydantic_core_schema__(
        cls, source_type: Any, handler: GetCoreSchemaHandler
    ) -> core_schema.CoreSchema:
        """
        Generates the Pydantic core schema for validation.
        """
        def validate(value: Any) -> MultiEnum:
            if value not in cls._value2member_map_:
                raise ValueError(f"Invalid value: {value}. Must be one of {list(cls._value2member_map_.keys())}.")
            return cls._value2member_map_[value]

        valid_values = list(cls._value2member_map_.keys())
        
        return core_schema.chain_schema([
            core_schema.literal_schema(valid_values),  # Use literal schema instead of enum
            core_schema.no_info_plain_validator_function(validate),
        ])

    @classmethod
    def __get_pydantic_json_schema__(
        cls, _core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler
    ) -> JsonSchemaValue:
        """
        Defines the JSON schema for the MultiEnum type, with detailed info about each member.
        """
        valid_values = list(cls._value2member_map_.keys())

        # Create a descriptive schema for each enum member
        enum_description: List[Dict[str, Any]] = []
        for member in cls:
            enum_description.append({
                'name': member.name,
                'canonical_value': member._value_,
                'aliases': [alias for alias in member._all_values if alias != member._value_],
            })
        
        # Generate the base JSON schema using a literal schema
        json_schema = handler(core_schema.literal_schema(valid_values))

        # Add the detailed description and the class docstring
        json_schema.update({
            'enum': valid_values,  # Ensure enum values include all canonical and alias values
            'description': cls.__doc__ or 'A custom MultiEnum type',  # Use the docstring as description
            'details': enum_description  # Include a detailed breakdown of each enum value
        })
        return json_schema

## Examples

### 'Flavor Profile'

The first value of each attribute is its "canonical value". In other words, that is what will be displayed if any of the choices are given. 

In [None]:
class FlavorProfile(MultiEnum):    
    
    black_fuit = (
        'Black Fruit', # the first value is the 'canonical value'
        'plum','jam','black cherry','blackberry','blackcurrant'
    )
    red_fruit = (
        'Red Fruit',
        'raspberry','red cherry','strawberries','cranberry'
    )

assert FlavorProfile('plum').value == 'Black Fruit'
assert (
    FlavorProfile('black cherry').value 
    == FlavorProfile('blackberry').value
)

If you try to include the same choice for multiple attributes, you'll get a ValueError

In [None]:
try:
    class FlavorProfile(MultiEnum):
        black_fruit = (
            'Black Fruit', # the first value is the 'canonical value'
            'plum','jam','cherry','blackberry','blackcurrant'
        )
        red_fruit = (
            'Red Fruit',
            'raspberry','cherry','strawberries','cranberry'
        )
except ValueError as e:
    print(e) # you can't include 'cherry' in both choices

cherry is trying to be mapped to Red Fruit, but it has already been mapped to instance FlavorProfile.black_fruit. You can't have the same value point to different objects.


#### MultiEnum from Dict

In [None]:
D = dict(    
    black_fruit = (
        'Black Fruit', # the first value is the 'canonical value'
        'plum','jam','black cherry','blackberry','blackcurrant'
    ),
    red_fruit = (
        'Red Fruit',
        'raspberry','red cherry','strawberries','cranberry'
    )
)

In [None]:
FlavorProfile = MultiEnum('FlavorProfile',D)

assert FlavorProfile('plum').value == 'Black Fruit'

### Adding and Merging MultEnums

In [None]:
class Beatles(MultiEnum):
    john = "John Lennon"
    paul = "Paul McCartney"
    george = "George Harrison"
    ringo = "Ringo Starr"

assert Beatles('john').value == "John Lennon"

class LedZepplin(MultiEnum):
    robert = "Robert Plant"
    jimmy = "Jimmy Page"
    john = "John Bonham"
    john_paul = "John Paul Jones"

assert LedZepplin("john").value=="John Bonham"

This won't work because there are two Johns

In [None]:
try:
    EnglishMusicians = MultiEnum('EnglishMusicians',Beatles + LedZepplin)
except Exception as e:
    print(e)

john is duplicated


In [None]:
class Beatles(MultiEnum):
    john_lennon = "John Lennon",'john'
    paul = "Paul McCartney"
    george = "George Harrison"
    ringo = "Ringo Starr"

assert Beatles('john').value == "John Lennon"

class LedZepplin(MultiEnum):
    robert = "Robert Plant"
    jimmy = "Jimmy Page"
    john_bonham = "John Bonham",'john'
    john_paul = "John Paul Jones"

assert LedZepplin("john").value=="John Bonham"

This won't work because 'john' is mapped to two members

In [None]:
try: 
    EnglishMusicians = MultiEnum('EnglishMusicians',Beatles + LedZepplin)
except Exception as e:
    print(e)

john is trying to be mapped to John Bonham, but it has already been mapped to instance EnglishMusicians.john_lennon. You can't have the same value point to different objects.


In [None]:
class Beatles(MultiEnum):
    john_lennon = "John Lennon",'john lennon',
    paul_mccartney = "Paul McCartney",'Macca'
    george_harrison = "George Harrison"
    ringo_starr = "Ringo Starr","Richard Starkey"

assert Beatles('john lennon').value == "John Lennon"

class LedZepplin(MultiEnum):
    robert_plant = "Robert Plant","Percy"
    jimmy_page = "Jimmy Page","Led Wallet"
    john_bonham = "John Bonham","Bonzo"
    john_paul_jones = "John Paul Jones","Jonesy"

assert LedZepplin("Bonzo").value=="John Bonham"

class BeatlesNickname(MultiEnum):
    john_lennon = "The Clever One"
    paul_mccartney = "The Cute One"
    george_harrison = "The Quiet One"
    ringo_starr = "The Funny One"

This won't work because we're trying to overwrite the Beatles with their nicknames

In [None]:
try:
    Beatles+BeatlesNickname+LedZepplin
except Exception as e:
    
    print(e)

john_lennon is duplicated


But this will because we can merge the beatles with their nicknames thanks to the `__iadd__` method

In [None]:
Beatles+=BeatlesNickname # <-- merge two multi-enums that share keys
EnglishMusicians = MultiEnum(
    'EnglishMusicians',Beatles+LedZepplin # <-- Add two multi-enums that have different keys
)

The result is a combined MultiEnum that retains the original canonical value.

In [None]:
assert EnglishMusicians('The Cute One').value=='Paul McCartney'
assert EnglishMusicians('Bonzo').value=="John Bonham"

### As part of a Pyantic model

In [None]:
from pydantic import BaseModel

In [None]:
class EnglishMusicianBio(BaseModel):
    name: EnglishMusicians
    instrument: str
    born: int

If you use `MultiEnum` in a Pydantic model, the schema will reflect the valid choices one can use to populate the model.

In [None]:
field_shcema = EnglishMusicianBio.schema()['properties']['name']
field_shcema['details'][0]

{'aliases': ['Percy', 'robert_plant'],
 'canonical_value': 'Robert Plant',
 'name': 'robert_plant'}

In [None]:
paul_bio = EnglishMusicianBio(name='The Cute One',instrument='bass',born=1942)
paul_bio.json()

'{"name":"Paul McCartney","instrument":"bass","born":1942}'

### A Fun Example
> Using `MultiEnum`to solve a classic beginner coding problem.

In [None]:
fizz = [x for x in range(0,100) if x%3==0]
buzz = [x for x in range(0,100) if x%5==0]

fizzbuzz = set(fizz).intersection(buzz)
fizz = (set(fizz)-set(buzz))-fizzbuzz
buzz = (set(buzz)-set(fizz))-fizzbuzz

class TestEnum(MultiEnum):
    fizz = 'Fizz',*fizz
    buzz = 'Buzz',*buzz
    fizzbuzz = 'FizzBuzz',*fizzbuzz

assert TestEnum(3).value=='Fizz'
assert TestEnum(5).value=='Buzz'
assert TestEnum(15).value=='FizzBuzz'