In [1]:
import numpy as np

def bitize(num):
    """"Transform an integer into an array of 0s and 1s.
    1  -> 1000000
    2  -> 0100000
    .
    .
    .
    32 -> 0000010
    64 -> 0000001
    """
    return np.unpackbits(np.array([num], dtype='uint8'), bitorder="little", count=7)

letters = ["a", "b", "c", "d", "e", "f", "g"]

It is somewhat confusing (to me at least) to have the same letters meaning different things.  
I'll therefore introduce new intermediary notation for the segments:
```
    . tt .
    t    t
    l    r
    . mm .
    b    b
    l    r
    . bb .
```
But that won't get us very far. I need some other format, one with which I can do some algebra - 
So let's encode each display to a list of 0s and 1s of length 7 (one element for each segment on the display).  
Every digit can then be "drawn" by turning the display's segments on and off (0 or 1).  

We can now use the intermediary notation to encode the segments, and thus also encode the digits. 


In [2]:
segment_positions = ["tt", "tl", "tr", "mm", "bl", "br", "bb"] # top, top-left, etc.
seg_bins = dict()
for i, pos in enumerate(segment_positions):
    seg_bins[pos] = bitize(2**i)
# This is the basic mapping for each segment
seg_bins  

{'tt': array([1, 0, 0, 0, 0, 0, 0], dtype=uint8),
 'tl': array([0, 1, 0, 0, 0, 0, 0], dtype=uint8),
 'tr': array([0, 0, 1, 0, 0, 0, 0], dtype=uint8),
 'mm': array([0, 0, 0, 1, 0, 0, 0], dtype=uint8),
 'bl': array([0, 0, 0, 0, 1, 0, 0], dtype=uint8),
 'br': array([0, 0, 0, 0, 0, 1, 0], dtype=uint8),
 'bb': array([0, 0, 0, 0, 0, 0, 1], dtype=uint8)}

In [3]:
digit_drawings = {0 : "tt tl tr bl br bb",
                  1 : "tr br",
                  2 : "tt tr mm bl bb",
                  3 : "tt tr mm br bb",
                  4 : "tl tr mm br",
                  5 : "tt tl mm br bb",
                  6 : "tt tl mm bl br bb",
                  7 : "tt tr br",
                  8 : "tt tl tr mm bl br bb",
                  9 : "tt tl tr mm br bb"}

In [4]:
seg_arr = np.array(list(seg_bins.values()))

In [5]:
digit_encodings = dict()
for digit, drawing in digit_drawings.items():
    segpos = []
    for seg in drawing.split():
        segpos.append(segment_positions.index(seg))
    digit_encodings[digit] = np.sum(seg_arr[segpos], axis=0)
digit_encodings

{0: array([1, 1, 1, 0, 1, 1, 1], dtype=uint64),
 1: array([0, 0, 1, 0, 0, 1, 0], dtype=uint64),
 2: array([1, 0, 1, 1, 1, 0, 1], dtype=uint64),
 3: array([1, 0, 1, 1, 0, 1, 1], dtype=uint64),
 4: array([0, 1, 1, 1, 0, 1, 0], dtype=uint64),
 5: array([1, 1, 0, 1, 0, 1, 1], dtype=uint64),
 6: array([1, 1, 0, 1, 1, 1, 1], dtype=uint64),
 7: array([1, 0, 1, 0, 0, 1, 0], dtype=uint64),
 8: array([1, 1, 1, 1, 1, 1, 1], dtype=uint64),
 9: array([1, 1, 1, 1, 0, 1, 1], dtype=uint64)}

In [6]:
dig_arr = np.array(list(digit_encodings.values()))

In [7]:
# How many segments need to be "turned on" in order to display a digit:
base_lengths = []
for k, val in digit_encodings.items():
    base_lengths.append(np.sum(val))

In [8]:
# Let's construct a list containing the lengths of each 4-"word" we need to decipher
lengths = []
filename = "data/day08a.txt"
with open(filename, 'r') as ff:
    four_digits = ff.readline().split('|')[-1].split() # keep only the part after "|"
    while four_digits:
        lengths.append([len(digit) for digit in four_digits])
        four_digits = ff.readline().split('|')[-1].split()
    
# What are the unique lengths:
unique_lengths = []
for length in base_lengths:
    if base_lengths.count(length) == 1:
        unique_lengths.append(length)

# Count the number of occurencies of the "words" of unique length
count = 0
for four_lengths in lengths:
    for ll in four_lengths:
        if ll in unique_lengths:
            count += 1
count

539

## part 2

In [9]:
import pandas as pd

In [10]:
df = pd.DataFrame(data=dig_arr.T, index=segment_positions, columns=range(10))
df["slen"] = df.sum(axis=1)
df["letters"] = letters
df.loc["nlen"] = df.iloc[:, :-1].sum(axis=0)
df.iloc[:, :-1] = df.iloc[:, :-1].astype(int)
df.iloc[-1, -2] = 0
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,slen,letters
tt,1,0,1,1,0,1,1,1,1,1,8,a
tl,1,0,0,0,1,1,1,0,1,1,6,b
tr,1,1,1,1,1,0,0,1,1,1,8,c
mm,0,0,1,1,1,1,1,0,1,1,7,d
bl,1,0,1,0,0,0,1,0,1,0,4,e
br,1,1,0,1,1,1,1,1,1,1,9,f
bb,1,0,1,1,0,1,1,0,1,1,7,g
nlen,6,2,5,5,4,5,6,3,7,6,0,


### Here is the decoding algorithm on how to obtain the "words" correponding to each digit:
Using only the first part of the input (part of the line before the "|"-sign). 

We first count how many times each letter is repeated. Then we get:  

-  0 : The only 6-letter word containing ONLY one of two letters repeated 7 times.
-  1 : The only 2-letter word
-  2 : The only 5-letter word containing the only letter repeated 4 times
-  3 : The only 5-letter word left after we identify the 2 and the 5
-  4 : The only 4-letter word
-  5 : The only 5-letter word containing the only letter repeated 6 times.
-  6 : The only 6-letter word containing ONLY one of two letters repeated 8 times.
-  7 : The only 4-letter word
-  8 : The only 7-letter word
-  9 : The only 6-letter left after we identify the 0 and the 6

In [11]:
def read_number(cijelikod):
    
    testkod = cijelikod.split("|")[0].split()
    def decipher(testkod=testkod):
        

        def word_contains(times_letter_repeated, given_word, testkod=testkod):
            """Check if the word contains a letter repeated `times_letter_repeated` in the testkod.
            Retruns:
             0, 1, or 2
            """
            letters = list('abcdefg')
            letter_counters = []
            ldic = dict()
            for letter in letters:
                counter = 0
                for word in testkod:
                    if letter in word:
                        counter +=1
                letter_counters.append(counter)
            lcount = np.array(letter_counters)

            indices = np.where(lcount == times_letter_repeated)
            larr = np.array(letters)
            duzina = np.intersect1d(larr[indices], np.array(list(given_word)))
            return len(duzina)

        codes = []
        digits = []
        for w in testkod:
            word = set(w)
            if len(word) == 2:
                codes.append(word)
                digits.append(1)
            elif len(word) == 3:
                codes.append(word)
                digits.append(7)
            elif len(word) == 4:
                codes.append(word)
                digits.append(4)
            elif len(word) == 7:
                codes.append(word)
                digits.append(8)
            elif len(word) == 5:
                # print(word_contains(4, word))
                if word_contains(4, word) == 1:
                    codes.append(word)
                    digits.append(2)
                elif word_contains(6, word) == 1:
                    codes.append(word)
                    digits.append(5)
                else:
                    codes.append(word)
                    digits.append(3)
            elif len(word) == 6:
                if word_contains(8, word) == 1:
                    codes.append(word)
                    digits.append(6)
                elif word_contains(7, word) == 1:
                    codes.append(word)
                    digits.append(0)
                else:
                    codes.append(word)
                    digits.append(9)
        return codes, digits
       
    procitaj = cijelikod.split("|")[-1].split()
    broj = []
    codes, digits = decipher(testkod)
    for rijec in procitaj:
        rijec = set(rijec)
        broj.append(digits[codes.index(rijec)])
    
    konacni = 0
    for i, bb in enumerate(broj[::-1]):
        konacni += bb*10**i
        
    return konacni

In [12]:
cijelikod = "acedgfb cdfbe gcdfa fbcad dab cefabd cdfgeb eafb cagedb ab | cdfeb fcadb cdfeb cdbaf" 
read_number(cijelikod)

5353

In [13]:
numbers = []
filename = "data/day08a.txt"
with open(filename, 'r') as ff:
    line = ff.readline()
    while line:
        numbers.append(read_number(line))
        line = ff.readline()

In [14]:
np.sum(numbers)

1084606