In [265]:
DATA = """
mxmxvkd kfcds sqjhc nhms (contains dairy, fish)
trh fvjkl sbzzf mxmxvkd (contains dairy)
sqjhc fvjkl (contains soy)
sqjhc mxmxvkd sbzzf (contains fish)
""".strip()

In [266]:
import numpy as np
import six
import re
from itertools import product

In [267]:
class Solver:
    
    """Utility to solve AoC's 2020 21st task."""
    
    def __init__(self, data):
        
        """
        Initialises the utility given the task's textual input;
        it should be in the format
        
            ingredient1, ingredient2, ..., ingredientN (contains allergen1, allergen2, ..., allergenM)
            ...
            
        with M <= N.
        
        :param data: str            
        """
        
        # Store the ingredient allergen information as tuples for each line;
        
        items = []
        for line in data.splitlines():
            ingredients, allergens = line.split(' (contains ')
            allergens = allergens[:-1].split(', ')
            ingredients = ingredients.split(' ')
            items.append((set(ingredients), set(allergens)))            
        self.items = items
    
    def solve(self):
        
        """
        Solves both tasks by first finding the unique mapping between
        ingredients and allergens.
        
        :return: occurrencesOfSafeIngredients:int, dangerousIngredients:str
        """
        
        # First, the mapping between all allergens and all the potential
        # ingredients in which they are contained is formed; this is the
        # intersection of all ingredients which appeared in a line in which
        # the allergen appeared; while it would be a one-to-many mapping
        # at first, this can be refined later;
        
        allergenToIngredients = {}
        occurrences = {}
        
        for ingredients, allergens in self.items:
            for ingredient in ingredients:
                occurrences[ingredient] = occurrences.get(ingredient, 0) + 1
            for allergen in allergens:
                if allergen not in allergenToIngredients: allergenToIngredients[allergen] = set(ingredients)
                else: allergenToIngredients[allergen] = allergenToIngredients[allergen].intersection(set(ingredients))
                    
        # Pre-conditions for determining the one-to-one mapping are that
        # (1) the mapping must exist and (2) the input should contain
        # enough information to narrow it down from the one-to-many mapping;
        # if that is the case, there should at first be an allergen already
        # mapped to a single ingredient, so that ingredient can be removed
        # from all other allergens. By iteratively repeating this, the final
        # mapping should eventually be found.
                    
        while not all(len(ingredients) == 1 for ingredients in six.itervalues(allergenToIngredients)):            
            for allergen, ingredients in six.iteritems(dict(allergenToIngredients)):
                if len(ingredients) == 1:
                    ingredient = list(ingredients)[0]
                    for a, ingredients in six.iteritems(dict(allergenToIngredients)):
                        if a == allergen: continue
                        allergenToIngredients[a] = ingredients.difference({ingredient,})
        
        allergenToIngredient = {allergen: list(ingredients)[0] for allergen, ingredients in six.iteritems(allergenToIngredients)}
        ingredientToAllergen = {ingredient: allergen for allergen, ingredient in six.iteritems(allergenToIngredient)}
        
        safeIngredients = {ingredient for ingredient in occurrences if ingredient not in six.itervalues(allergenToIngredient)}
        occurrencesOfSafeIngredients = sum(occurrences[i] for i in safeIngredients)
        
        dangerous = []
        for ingredient in sorted(ingredientToAllergen, key=lambda x: ingredientToAllergen[x]):
            dangerous.append(ingredient)
        dangerous = ','.join(dangerous)
        
        return occurrencesOfSafeIngredients, dangerous

In [268]:
s = Solver(DATA)

In [269]:
s.solve()

(5, 'mxmxvkd,sqjhc,fvjkl')