In [100]:
import numpy as np
from collections import Counter

In [94]:
def load_data(datafile):
    inputs = []
    outputs = []
    with open(datafile, 'r') as fp:
        for line in fp.readlines():
            input, output = line.strip().split(' | ')
            inputs.append(input.split())
            outputs.append(output.split())
    return inputs, outputs

In [98]:
reference = ['abcefg', 'cf', 'acdeg', 'acdfg', 'bcdf', 'abdfg', 'abdefg', 'acf', 'abcdefg', 'abcdfg']
input, output = load_data("test.txt")
input

[['acedgfb',
  'cdfbe',
  'gcdfa',
  'fbcad',
  'dab',
  'cefabd',
  'cdfgeb',
  'eafb',
  'cagedb',
  'ab']]

In [82]:
# ones are len 2, sevens are len 3, fours are ln 4, and eights are len 7
lens = [2, 3, 4, 7]

In [83]:
sum = 0
for l in lens:
    for o in output:
        for w in o:
            if len(w) == l:
                sum = sum + 1
sum

0

In [84]:
ref_cnt = Counter()
for c in ''.join(reference):
    ref_cnt[c] += 1
ref_most_common = ref_cnt.most_common(1)[0][0]
ref_least_common = ref_cnt.most_common()[-1][0]
ref_next_least_common = ref_cnt.most_common()[-2][0]
ref_cnt, ref_most_common, ref_least_common, ref_next_least_common

(Counter({'a': 8, 'b': 6, 'c': 8, 'e': 4, 'f': 9, 'g': 7, 'd': 7}),
 'f',
 'e',
 'b')

In [113]:
def make_mapping(data):
    data = sorted(data, key=len)
    mapping = {}
    cnt = Counter()
    for c in ''.join(data):
        cnt[c] += 1
    most_common = cnt.most_common(1)[0][0]
    least_common = cnt.most_common()[-1][0]
    next_least_common = cnt.most_common()[-2][0]
    # can map e (least common) and f (most common) based on counts. b is next least common. 
    mapping[most_common] = ref_most_common
    mapping[least_common] = ref_least_common
    mapping[next_least_common] = ref_next_least_common

    found = [most_common, least_common, next_least_common]

    # we've sorted the data so the first one is 1 which maps to cf. 
    # we already know f is the most common so the other one is c.
    for chr in data[0]:
        if chr != most_common:
            mapping[chr] = 'c'
            found.append(chr)

    # the next longest is 7 which maps to acf. cf are already matched
    # so the remaining is a. 
    for chr in data[1]:
        if chr not in found:
            mapping[chr] = 'a'
            found.append(chr)

    # the next longest is 4 which maps to bcdf. we know b, c, and f so the remainder is d.
    for chr in data[2]:
        if chr not in found:
            mapping[chr] = 'd'
            found.append(chr)

    # at this point we know a, b, c, d, e, and f. can use the longest word, 8, to get g.
    for chr in data[-1]:
        if chr not in found:
            mapping[chr] = 'g'
    return mapping

def parse_num(word, mapping):
    output = []
    for c in word:
        output.append(mapping[c])
    parsed = ''.join(sorted(output))
    return reference.index(parsed)

def process(datafile):
    inputs, outputs = load_data(datafile)
    numbers = []
    for input, output in zip(inputs, outputs):
        mapping = make_mapping(input)
        numerals = []
        for w in output:
            numerals.append(f"{parse_num(w, mapping)}")
        number = int(''.join(numerals))
        numbers.append(number)
    return numbers

In [109]:
mapping = make_mapping(input[0])
parse_num('cdfeb', mapping)

5

In [118]:
arr = process("input.txt")
np.sum(arr)

1011785