In [1]:
import doctest
import io
import re
from enum import auto, Enum
from pathlib import Path
from typing import Iterable

In [2]:
DATA = "input.txt"

# Part 1

In [3]:
def parse_calibration(line: str) -> int:
    """Return the two-digit calibration number for a line.

    Example:

        >>> line = "1abc2"
        >>> parse_calibration(line)
        12
        
        >>> line = "pqr3stu8vwx"
        >>> parse_calibration(line)
        38
        
        >>> line = "a1b2c3d4e5f"
        >>> parse_calibration(line)
        15
        
        >>> line = "treb7uchet"
        >>> parse_calibration(line)
        77
    """
    first = last = None
    for c in line:
        try:
            n = int(c)
        except ValueError:
            continue
        if first is None:
            first = last = n
        else:
            last = n
    return 10*first + last

In [4]:
def calibration_values(data: io.TextIOBase) -> Iterable[int]:
    r"""

    Example:

        >>> data = r'''1abc2
        ... pqr3stu8vwx
        ... a1b2c3d4e5f
        ... treb7uchet
        ... '''
        >>> list(calibration_values(io.StringIO(data)))
        [12, 38, 15, 77]
    """
    for line in data:
        yield parse_calibration(line)

In [5]:
doctest.testmod()

TestResults(failed=0, attempted=10)

In [6]:
with open(DATA, "r") as f:
    print(sum(calibration_values(f)))

56465


# Part 2

Idea: make a simple state machine for parsing that steps through the characters and yields the 0-9 valued integers it finds in either numeric or word form.

What states are required? If the character is an integer it can just be yielded. Otherwise we're either at the end of a previous parse -- `NULL` state -- or are in the middle of a possible parse of a word-valued integer. The state machine keeps advancing, emitting the number if it moves to an (implicit) `EMIT_X` state. The state machine resets to `NULL` if the next character is a numeric value or not one of the possible transitions.

In practice each new character has the possibility to advance, end and start multiple states. e.g. when parsing the character sequence `seveight` the second `e` advances the `MYB_7_SEV` state and introduces the `MYB_8_E` state. The number of states could be expanded to account for all of these overlaps but for practicality a set of current possible states is used instead.

```
NULL ---- o --> MYB_1_O -- n --> MYB_1_ON -- e --> EMIT1
     |
     ---- t --> MYB_2_3_T -- w --> MYB_2_TW -- o --> EMIT_2
     |                         |
     |                         -- h --> MYB_3_TH -- r --> MYB_3_THR -- e --> MYB_3_THRE -- e --> EMIT_3
     |
     ---- f --> MYB_4_5_F -- o --> MYB_4_FO -- u --> MYB_4_FOU -- r --> EMIT_4
     |                    |
     |                    -- i --> MYB_5_FI -- v --> MYB_5_FIV -- e --> EMIT_5
     |
     ---- s --> MYB_6_7_S -- i --> MYB_6_SI -- x --> EMIT_6
     |                    |
     |                    -- e --> MYB_7_SE -- v --> MYB_7_SEV -- e --> MYB_7_SEVE -- n --> EMIT_7
     |
     ---- e --> MYB_8_E -- i --> MYB_8_EI -- g --> MYB_8_EIG -- h --> MYB_8_EIGH -- t --> EMIT_8
     |
     ---- n --> MYB_9_N -- i --> MYB_9_NI -- n --> MYB_9_NIN -- e --> EMIT_9
```

In [7]:
class State(Enum):
    NULL = auto()
    
    MYB_1_O  = auto()
    MYB_1_ON = auto()

    MYB_2_3_T = auto()
    
    MYB_2_TW   = auto()
    
    MYB_3_TH   = auto()
    MYB_3_THR  = auto()
    MYB_3_THRE = auto()

    MYB_4_5_F = auto()

    MYB_4_FO  = auto()
    MYB_4_FOU = auto()

    MYB_5_FI = auto()
    MYB_5_FIV = auto()

    MYB_6_7_S = auto()
    
    MYB_6_SI = auto()

    MYB_7_SE   = auto()
    MYB_7_SEV  = auto()
    MYB_7_SEVE = auto()

    MYB_8_E    = auto()
    MYB_8_EI   = auto()
    MYB_8_EIG  = auto()
    MYB_8_EIGH = auto()

    MYB_9_N   = auto()
    MYB_9_NI  = auto()
    MYB_9_NIN = auto()

In [8]:
def parse_digits(line: str) -> Iterable[int]:
    """Returns all the numeric of spelled out single digits in ``line``.

    Example:

        >>> line = "two1nine"
        >>> list(parse_digits(line))
        [2, 1, 9]

        >>> line = "eightwothree"
        >>> list(parse_digits(line))
        [8, 2, 3]

        >>> line = "abcone2threexyz"
        >>> list(parse_digits(line))
        [1, 2, 3]

        >>> line = "xtwone3four"
        >>> list(parse_digits(line))
        [2, 1, 3, 4]

        >>> line = "4nineeightseven2"
        >>> list(parse_digits(line))
        [4, 9, 8, 7, 2]

        >>> line = "zoneight234"
        >>> list(parse_digits(line))
        [1, 8, 2, 3, 4]

        >>> line = "7pqrstsixteen"
        >>> list(parse_digits(line))
        [7, 6]
    """
    states = []
    new_states = []
    
    for c in line:
        states.append(State.NULL)
        new_states = []
        for state in states:
            match (state, c):
                case State.NULL, _:
                    try:
                        n = int(c)
                        yield n
                    except:
                        pass
                    match c:
                        case "o":
                            new_states.append(State.MYB_1_O)
                        case "t":
                            new_states.append(State.MYB_2_3_T)
                        case "f":
                            new_states.append(State.MYB_4_5_F)
                        case "s":
                            new_states.append(State.MYB_6_7_S)
                        case "e":
                            new_states.append(State.MYB_8_E)
                        case "n":
                            new_states.append(State.MYB_9_N)
                
                case State.MYB_1_O, "n":
                    new_states.append(State.MYB_1_ON)
                case State.MYB_1_ON, "e":
                    yield 1
                    
                case State.MYB_2_3_T, "w":
                    new_states.append(State.MYB_2_TW)
                case State.MYB_2_3_T, "h":
                    new_states.append(State.MYB_3_TH)
                    
                case State.MYB_2_TW, "o":
                    yield 2
                    
                case State.MYB_3_TH, "r":
                    new_states.append(State.MYB_3_THR)
                case State.MYB_3_THR, "e":
                    new_states.append(State.MYB_3_THRE)
                case State.MYB_3_THRE, "e":
                    yield 3
    
                case State.MYB_4_5_F, "o":
                    new_states.append(State.MYB_4_FO)
                case State.MYB_4_5_F, "i":
                    new_states.append(State.MYB_5_FI)
                    
                case State.MYB_4_FO, "u":
                    new_states.append(State.MYB_4_FOU)
                case State.MYB_4_FOU, "r":
                    yield 4
    
                case State.MYB_5_FI, "v":
                    new_states.append(State.MYB_5_FIV)
                case State.MYB_5_FIV, "e":
                    yield 5
    
                case State.MYB_6_7_S, "i":
                    new_states.append(State.MYB_6_SI)
                case State.MYB_6_7_S, "e":
                    new_states.append(State.MYB_7_SE)
    
                case State.MYB_6_SI, "x":
                    yield 6
    
                case State.MYB_7_SE, "v":
                    new_states.append(State.MYB_7_SEV)
                case State.MYB_7_SEV, "e":
                    new_states.append(State.MYB_7_SEVE)
                case State.MYB_7_SEVE, "n":
                    yield 7
    
                case State.MYB_8_E, "i":
                    new_states.append(State.MYB_8_EI)
                case State.MYB_8_EI, "g":
                    new_states.append(State.MYB_8_EIG)
                case State.MYB_8_EIG, "h":
                    new_states.append(State.MYB_8_EIGH)
                case State.MYB_8_EIGH, "t":
                    yield 8
    
                case State.MYB_9_N, "i":
                    new_states.append(State.MYB_9_NI)
                case State.MYB_9_NI, "n":
                    new_states.append(State.MYB_9_NIN)
                case State.MYB_9_NIN, "e":
                    yield 9
        states = new_states

In [9]:
def parse_calibration_2(line: str) -> int:
    """Return the two-digit calibration number for a line.

    Example:

        >>> parse_calibration_2("two1nine")
        29

        >>> parse_calibration_2("eightwothree")
        83

        >>> parse_calibration_2("abcone2threexyz")
        13

        >>> parse_calibration_2("xtwone3four")
        24

        >>> parse_calibration_2("4nineeightseven2")
        42

        >>> parse_calibration_2("zoneight234")
        14

        >>> parse_calibration_2("7pqrstsixteen")
        76
    """
    first = last = None
    for n in parse_digits(line):
        if first is None:
            first = last = n
        else:
            last = n
    return 10*first + n

In [10]:
def calibration_values_2(data: io.TextIOBase) -> Iterable[int]:
    r"""

    Example:

        >>> data = r'''1abc2
        ... pqr3stu8vwx
        ... a1b2c3d4e5f
        ... treb7uchet
        ... '''
        >>> list(calibration_values(io.StringIO(data)))
        [12, 38, 15, 77]
    """
    for line in data:
        yield parse_calibration_2(line)

In [11]:
doctest.testmod()

TestResults(failed=0, attempted=33)

In [12]:
with open(DATA, "r") as f:
    print(sum(calibration_values_2(f)))

55902
