In [1]:
test_input = """
be cfbegad cbdgef fgaecd cgeb fdcge agebfd fecdb fabcd edb | fdgacbe cefdb cefbgd gcbe
edbfga begcd cbg gc gcadebf fbgde acbgfd abcde gfcbed gfec | fcgedb cgb dgebacf gc
fgaebd cg bdaec gdafb agbcfd gdcbef bgcad gfac gcb cdgabef | cg cg fdcagb cbg
fbegcd cbd adcefb dageb afcb bc aefdc ecdab fgdeca fcdbega | efabcd cedba gadfec cb
aecbfdg fbg gf bafeg dbefa fcge gcbea fcaegb dgceab fcbdga | gecf egdcabf bgf bfgea
fgeab ca afcebg bdacfeg cfaedg gcfdb baec bfadeg bafgc acf | gebdcfa ecba ca fadegcb
dbcfg fgd bdegcaf fgec aegbdf ecdfab fbedc dacgb gdcebf gf | cefg dcbef fcge gbcadfe
bdfegc cbegaf gecbf dfcage bdacg ed bedf ced adcbefg gebcd | ed bcgafe cdgba cbgef
egadfb cdbfeg cegd fecab cgb gbdefca cg fgcdab egfdb bfceg | gbdfcae bgc cg cgb
gcafb gcf dcaebfg ecagb gf abcdeg gaef cafbge fdbac fegbdc | fgae cfgab fg bagce
"""

In [7]:
data = []
for line in test_input.strip().splitlines():
    examples_txt, values_txt = line.split('|')
    examples = examples_txt.strip().split(' ')
    values = values_txt.strip().split(' ')
    data.append((examples, values))

In [10]:
examples, values = data[0]
examples, values

(['be',
  'cfbegad',
  'cbdgef',
  'fgaecd',
  'cgeb',
  'fdcge',
  'agebfd',
  'fecdb',
  'fabcd',
  'edb'],
 ['fdgacbe', 'cefdb', 'cefbgd', 'gcbe'])

In [13]:
digit_to_len = {
    1: 2,
    4: 4,
    7: 3,
    8: 7,
}
lengths = set(digit_to_len.values())
tot_1478 = 0
for examples, values in data:
    for v in values:
        if len(v) in lengths:
            tot_1478 += 1

In [14]:
tot_1478

26

In [15]:
with open('input.txt', 'r') as f:
    input_ = f.read()

In [16]:
data = []
for line in input_.strip().splitlines():
    examples_txt, values_txt = line.split('|')
    examples = examples_txt.strip().split(' ')
    values = values_txt.strip().split(' ')
    data.append((examples, values))

In [17]:
tot_1478 = 0
for examples, values in data:
    for v in values:
        if len(v) in lengths:
            tot_1478 += 1
tot_1478

530

# Part 2

In [126]:
# how many segments overlap with 1, 4, 7, 8?
digit_to_overlap = {
    0: (2, 3, 3, 6),
    2: (1, 2, 2, 5),
    3: (2, 3, 3, 5),
    5: (1, 3, 2, 5),
    6: (1, 3, 2, 6),
    9: (2, 4, 3, 6),
}

overlap_to_digit = {v: k for k, v in digit_to_overlap.items()}
overlap_to_digit

{(1, 2, 2, 5): 2,
 (1, 3, 2, 5): 5,
 (1, 3, 2, 6): 6,
 (2, 3, 3, 5): 3,
 (2, 3, 3, 6): 0,
 (2, 4, 3, 6): 9}

In [21]:
data = []
for line in test_input.strip().splitlines():
    examples_txt, values_txt = line.split('|')
    examples = examples_txt.strip().split(' ')
    values = values_txt.strip().split(' ')
    data.append((examples, values))

In [178]:
def overlap(s1, s2):
    return len(set(s1).intersection(s2))


def sorted_str(s):
    return ''.join(sorted(s))


def build_rosetta_stone(examples):
    # identify 1, 4, 7, 8
    examples_by_len = sorted(examples, key=len)
    rosetta_stone = {
        1: examples_by_len[0], 
        4: examples_by_len[2], 
        7: examples_by_len[1], 
        8: examples_by_len[-1]
    }

    # compute overlaps
    examples_left = set(examples) - set(rosetta_stone.values())
    patterns_1478 = [rosetta_stone[x] for x in [1, 4, 7, 8]]
    # overlaps
    overlaps = {e: tuple(overlap(e, s) for s in patterns_1478) for e in examples_left}

    # translate overlaps to digits
    digits = {overlap_to_digit[o]: e for e, o in overlaps.items()}
    rosetta_stone.update(digits)
    return rosetta_stone


def translate_values(values, rosetta_stone):
    rosetta_stone_inv = {sorted_str(v): str(k) for k, v in rosetta_stone.items()}
    value = int(''.join([rosetta_stone_inv[sorted_str(v)] for v in values]))
    return value

In [180]:
expected = [8394, 9781, 1197, 9361, 4873, 8418, 4548, 1625, 8717, 4315]
for examples, values in data:
    rosetta_stone = build_rosetta_stone(examples)
    print(translate_values(values, rosetta_stone))

8394
9781
1197
9361
4873
8418
4548
1625
8717
4315


In [184]:
with open('input.txt', 'r') as f:
    input_ = f.read()

data = []
for line in input_.strip().splitlines():
    examples_txt, values_txt = line.split('|')
    examples = examples_txt.strip().split(' ')
    values = values_txt.strip().split(' ')
    data.append((examples, values))

In [185]:
len(data)

200

In [186]:
sum_ = 0
for examples, values in data:
    rosetta_stone = build_rosetta_stone(examples)
    sum_ += translate_values(values, rosetta_stone)

In [187]:
sum_

1051087