In [70]:
example = """
mxmxvkd kfcds sqjhc nhms (contains dairy, fish)
trh fvjkl sbzzf mxmxvkd (contains dairy)
sqjhc fvjkl (contains soy)
sqjhc mxmxvkd sbzzf (contains fish)
""".strip().split('\n')
example

['mxmxvkd kfcds sqjhc nhms (contains dairy, fish)',
 'trh fvjkl sbzzf mxmxvkd (contains dairy)',
 'sqjhc fvjkl (contains soy)',
 'sqjhc mxmxvkd sbzzf (contains fish)']

In [71]:
import re

line = example[0]
ingredients, allergens = re.search('(.+?) \(contains (.+?)\)', line).groups()
ingredients = ingredients.split(' ')
allergens = allergens.split(', ')
print(ingredients)
print(allergens)

['mxmxvkd', 'kfcds', 'sqjhc', 'nhms']
['dairy', 'fish']


In [72]:
# each ingredient may contain at most one allergen; each allergen maps to some ingredient

from collections import defaultdict

def puzzle(example):

    # for each allergen, list the lines it's found in
    allergen_to_ingredient_lists = defaultdict(list)
    allergen_map = {}

    for line in example:
        ingredients, allergens = re.search('(.+?) \(contains (.+?)\)', line).groups()
        ingredients = ingredients.split(' ')
        allergens = allergens.split(', ')
        for allergen in allergens:
            allergen_to_ingredient_lists[allergen].append(ingredients)

    while True:
        # take the set intersection of ingredient lists this allergen occurs in: is it 1?
        for allergen, ingredient_list in allergen_to_ingredient_lists.items():
            possible_ingredients = set.intersection(*[set(x) for x in ingredient_list])
            if len(possible_ingredients) == 1:
                matched_ingredient = list(possible_ingredients)[0]
                allergen_map[allergen] = matched_ingredient

        print('Allergen map: %s' % allergen_map)

        # if we know this is dairy, we can remove it from the other allergen_to_ingredient_lists lists;
        # do not change dairy, but update the lists for fish and soy
        for allergen, matched_ingredient in allergen_map.items():
            for allergen2 in set(allergen_to_ingredient_lists.keys()).difference({'dairy'}):
                ingredient_lists = allergen_to_ingredient_lists[allergen2]
                # removed matched ingredient from this other list
                for ingredient_list in ingredient_lists:
                    if matched_ingredient in ingredient_list:
                        ingredient_list.remove(matched_ingredient)

        if len(allergen_map) == len(allergen_to_ingredient_lists):
            break

    # go back through the original lists of all the things, diff with known allergens, keep count
    count_no_allergen = 0
    ingredients_with_known_allergens = set(allergen_map.values())
    for line in example:
        ingredients = re.search('(.+?) \(contains (.+?)\)', line).groups()[0].split(' ')
        for ingredient in ingredients:
            if ingredient not in ingredients_with_known_allergens:
                count_no_allergen += 1

    print(count_no_allergen)
    return allergen_map
    
puzzle(example)

Allergen map: {'dairy': 'mxmxvkd'}
Allergen map: {'dairy': 'mxmxvkd', 'fish': 'sqjhc'}
Allergen map: {'dairy': 'mxmxvkd', 'fish': 'sqjhc', 'soy': 'fvjkl'}
5


{'dairy': 'mxmxvkd', 'fish': 'sqjhc', 'soy': 'fvjkl'}

In [73]:
with open('inputs/input21.txt') as f:
    input21 = f.read().split('\n')
    
allergens = puzzle(input21)

Allergen map: {'sesame': 'xzfj'}
Allergen map: {'sesame': 'xzfj', 'fish': 'ndfb', 'soy': 'fkcmf'}
Allergen map: {'sesame': 'xzfj', 'fish': 'ndfb', 'soy': 'fkcmf', 'dairy': 'ktpbgdn', 'wheat': 'hdqkqhh'}
Allergen map: {'sesame': 'xzfj', 'fish': 'ndfb', 'soy': 'fkcmf', 'dairy': 'ktpbgdn', 'wheat': 'hdqkqhh', 'shellfish': 'bfgcms', 'eggs': 'pnpfjb'}
Allergen map: {'sesame': 'xzfj', 'fish': 'ndfb', 'soy': 'fkcmf', 'dairy': 'ktpbgdn', 'wheat': 'hdqkqhh', 'shellfish': 'bfgcms', 'eggs': 'pnpfjb', 'peanuts': 'rdhljms'}
2380


In [74]:
# puzzle 2

','.join([allergens[k] for k in sorted(allergens.keys())])

'ktpbgdn,pnpfjb,ndfb,rdhljms,xzfj,bfgcms,fkcmf,hdqkqhh'