# Day 21
## Part 1
One of those where understanding the question is the main challenge. 

In [1]:
import parse
from collections import namedtuple, Counter
import itertools


Ingredients = namedtuple('Ingredients', 'ingredients allergens')


def parse_data(s):
    ingredients = []
    for line in s.strip().splitlines():
        p = parse.parse('{ingredients} (contains {allergens})', line)
        ingredients.append(
            Ingredients(set(p['ingredients'].split()), 
                        set(p['allergens'].split(', ')))
        )
    return ingredients

In [2]:
test_data = parse_data('''mxmxvkd kfcds sqjhc nhms (contains dairy, fish)
trh fvjkl sbzzf mxmxvkd (contains dairy)
sqjhc fvjkl (contains soy)
sqjhc mxmxvkd sbzzf (contains fish)
''')

In [3]:
test_data

[Ingredients(ingredients={'mxmxvkd', 'sqjhc', 'kfcds', 'nhms'}, allergens={'dairy', 'fish'}),
 Ingredients(ingredients={'fvjkl', 'trh', 'mxmxvkd', 'sbzzf'}, allergens={'dairy'}),
 Ingredients(ingredients={'fvjkl', 'sqjhc'}, allergens={'soy'}),
 Ingredients(ingredients={'mxmxvkd', 'sqjhc', 'sbzzf'}, allergens={'fish'})]

In [4]:
def allergen_candidates(ingredients):
    allergens = {}
    for i in ingredients:
        for a in i.allergens:
            if a in allergens:
                allergens[a] = allergens[a] & i.ingredients
            else:
                allergens[a] = i.ingredients
                
    return allergens

In [5]:
allergen_candidates(test_data)

{'dairy': {'mxmxvkd'}, 'fish': {'mxmxvkd', 'sqjhc'}, 'soy': {'fvjkl', 'sqjhc'}}

In [6]:
def part_1(ingredients):
    possible_allergens = set.union(*allergen_candidates(ingredients).values())
    all_ingredients = Counter(itertools.chain.from_iterable(i.ingredients for i in ingredients))
    return sum(all_ingredients[i] for i in all_ingredients if i not in possible_allergens)


In [7]:
part_1(test_data)

5

In [8]:
ingredients = parse_data(open('input').read())

In [9]:
part_1(ingredients)

2072

## Part 2

Following Ben Lavelle's method of using bipartite graph matching for day 16.

In [10]:
import networkx as nx

def part_2(ingredients):
    possible_allergens = allergen_candidates(ingredients)
    B = nx.DiGraph()
    B.add_nodes_from(possible_allergens.keys(), bipartite=0)
    B.add_nodes_from(set.union(*possible_allergens.values()), bipartite=1)
    for a in possible_allergens:
        for i in possible_allergens[a]:
            B.add_edge(a, i)
    m = nx.bipartite.maximum_matching(B)
    return ','.join(m[a] for a in sorted(possible_allergens))

In [11]:
part_2(test_data)

'mxmxvkd,sqjhc,fvjkl'

In [12]:
part_2(ingredients)

'fdsfpg,jmvxx,lkv,cbzcgvc,kfgln,pqqks,pqrvc,lclnj'