# Day 07 - Chained Intcode CPUs

More Intcode shenanigans. I've extracted my Intcode interpreter from [day 5](./Day%2005.ipynb) into a separate module, [`intcode`](./intcode.py) so we can reuse it here and, as I suspect, in later puzzles.

We start with a simple brute-force approach to connecting 5 CPUs in series.

In [1]:
from __future__ import annotations
from itertools import permutations
from typing import Iterable, List, Tuple

from intcode import ioset, CPU, Memory

def maximize_thrust(memory: Memory):
    thrusts = []
    for inputs in permutations(range(5)):
        thrust = 0
        for inp in inputs:
            outputs, intset = ioset(inp, thrust)
            cpu = CPU(intset)
            cpu.reset(memory).execute()
            thrust = outputs[0]
            thrusts.append(thrust)
    return max(thrusts)

part1_tests = (
    (
        [3, 15, 3, 16, 1002, 16, 10, 16, 1, 16, 15, 15, 4, 15, 99, 0, 0],
        43210,
    ),
    (
        [
            3, 23, 3, 24, 1002, 24, 10, 24, 1002, 23, -1, 23,
            101, 5, 23, 23, 1, 24, 23, 23, 4, 23, 99, 0, 0
        ],
        54321,
    ),
    (
        [
            3, 31, 3, 32, 1002, 32, 10, 32, 1001, 31, -2, 31,
            1007, 31, 0, 33, 1002, 33, 7, 33, 1, 33, 31, 31,
            1, 32, 31, 31, 4, 31, 99, 0, 0, 0
        ],
        65210,
    ),
)
for testmem, expected in part1_tests:
    assert maximize_thrust(testmem) == expected

In [2]:
import aocd
data = aocd.get_data(day=7, year=2019)
memory = list(map(int, data.split(',')))

In [3]:
print("Part 1:", maximize_thrust(memory))

Part 1: 17440


## Part 2, co-operative cpus

Now we have to execute the CPUs in series *and pause them* as they wait for more input from the preceding CPU. That's because amplifier A can't continue until amplifier E has produced output. This is just like Python generators, pausing and resuming as you iterate.

I'm pausing my CPUs by raising an exception in the input instruction when there is no input available yet. This leaves the CPU state in-tact, in that the execution pointer stays at the same position. Calling `cpu.execute()` another time will re-execute the input instruction, continuing where we left off.

I updated by `intcode` module to accept any iterable as the input buffer, and created a `ChainedInput` class that implements the iterator protocol and raises a custom `AwaitingInput` exception. I can't use regular list and `next(iter(...))`, because once you've reached the end of the list (no inputs avalibale), the list iterator raises `StopIteration` and will continue to do so even if you were to append new values. This is exactly what the list iterator *should* do, as per the [iterators specification (PEP 234)](https://www.python.org/dev/peps/pep-0234/). Once an iterator is done, it stays done.

In [4]:
from collections import deque
from typing import Deque, Iterator, List, Sequence

class AwaitingInput(Exception):
    """Raised when a CPU is awaiting input"""

class ChainedInput(Iterator[int]):
    buffer: List[int]
    pos: int

    def __init__(self, buffer: List[int]) -> None:
        self.buffer = buffer
        self.pos = 0
    
    def __repr__(self):
        state = "(waiting)" if self.pos >= len(self.buffer) else f"[{self.buffer[self.pos]}]"
        return f"<ChainedInput({self.buffer!r}) {state}>"

    def __iter__(self) -> ChainedInput:
        return self

    def __next__(self) -> int:
        if self.pos >= len(self.buffer):
            raise AwaitingInput
        value, self.pos = self.buffer[self.pos], self.pos + 1
        return value

def run_chained(settings: Sequence[int], memory: Memory) -> int:
    inputs: List[ChainedInput] = []
    output: List[int] = []
    amps: List[CPU] = []
    for setting in settings:
        inp = ChainedInput(output)
        inp.buffer.append(setting)
        inputs.append(inp)
        output, intset = ioset(inp)
        cpu = CPU(intset).reset(memory)
        amps.append(cpu)
        
    # connect up the output of amp[e] to the input of amp[a]
    output_e = inputs[0].buffer = output
    # set initial input
    inputs[0].buffer += (settings[0], 0)

    # run all amplifier CPUs, until we reach the halt stage.
    # note: make sure to execute all the amps before deciding
    # we are done, not when the first amp halts.
    halted = False
    while not halted:
        for amp in amps:
            try:
                amp.execute()
            except AwaitingInput:
                pass
            else:
                halted = True
    return output_e[-1]

def maximize_chained_thrust(memory: Memory):
    thrusts = []
    for inputs in permutations(range(5, 10)):
        thrusts.append(run_chained(inputs, memory))
    return max(thrusts)

part2_tests = (
    (
        [
            3, 26, 1001, 26, -4, 26, 3, 27, 1002, 27, 2, 27,
            1, 27, 26, 27, 4, 27, 1001, 28, -1, 28, 1005, 28, 6,
            99, 0, 0, 5
        ],
        139629729,
    ),
    (
        [
            3, 52, 1001, 52, -5, 52, 3, 53, 1, 52, 56, 54,
            1007, 54, 5, 55, 1005, 55, 26, 1001, 54, -5, 54,
            1105, 1, 12, 1, 53, 54, 53, 1008, 54, 0, 55,
            1001, 55, 1, 55, 2, 53, 55, 53, 4, 53, 1001, 56, -1, 56,
            1005, 56, 6, 99, 0, 0, 0, 0, 10
        ],
        18216
    ),
)
for testmem, expected in part2_tests:
    assert maximize_chained_thrust(testmem) == expected

In [5]:
print("Part 2:", maximize_chained_thrust(memory))

Part 2: 27561242


## Using Python coroutines?

Can we translate intcode into Python coroutines, and use so have Python do the pausing?

A quick glance over the puzzle input shows that the 'setting' value is simply the input for a jump operation; a table of jump targets at addresses 10 through 19 are the basis of a 'switch' statement. The first input is stored in location 8, the next instruction adds 10, followed by an unconditional jump to the address referenced by location 8.

For part 1, the 5 sections simply apply a handlful of additions and multiplications to the input before writing the outputs. For part 2, the number of operations just runs longer, no looping even. They all make use of a single register for storing input, manipulating the value, and producing the output.

So we can totally turn this into a Python coroutine, using queues to pass input from one coroutine to the next. Each coroutine would take two queues: an input queue and an output queue. We can then translate the *input* opcode to `await inputqueue.get()` and the *output* opcode to `outputqueue.put_nowait()`. I've opted to turn jumps into separate coroutines, and handle the variable indirect jump target through a dynamic symboltable and namespace combo. Execution shares a single namespace, inputqueue and outputqueue, while the symboltable and generated coroutines are shared.

The following code generates coroutines on demand for the initial programme, as well as for any jump targets, and then runs everything concurrently. Not just the 5 CPUs for a single configuration, but *all* configurations, calculating the maximum thrust entirely in parallel:

In [6]:
import asyncio
import linecache
from collections import defaultdict
from contextvars import ContextVar
from functools import partial
from string import ascii_letters
from types import CodeType
from typing import cast, Any, Awaitable, Callable, Dict, Mapping, Optional

from intcode import Instruction, Halt, Memory, ParameterMode

AwaitableFunction = Callable[[], Awaitable[None]]


inputqueue: ContextVar[asyncio.Queue] = ContextVar("inputqueue")
outputqueue: ContextVar[asyncio.Queue] = ContextVar("outputqueue")
namespace: ContextVar[Dict[str, int]] = ContextVar("namespace")


class AsyncCrossCompilerCPU(CPU):
    def __init__(self):
        self.opcodes = {
            1: Instruction("{} + {}".format, 2, True),
            2: Instruction("{} * {}".format, 2, True),
            3: Instruction("await inp.get()".format, output=True),
            4: Instruction("out.put_nowait({})".format, 1),
            5: Instruction("if {}: return await cpu.coros[{}]()".format, 2),
            6: Instruction("if not {}: return await cpu.coros[{}]()".format, 2),
            7: Instruction("int({} < {})".format, 2, True),
            8: Instruction("int({} == {})".format, 2, True),
            99: Instruction("return".format),
        }
        
    def reset(self, memory: Optional[Memory] = None) -> AsyncCrossCompilerCPU:
        factory = self._compile_coro
        class CoroCache(dict, Mapping[int, AwaitableFunction]):
            def __missing__(self, key: int) -> AwaitableFunction:
                self[key] = coro = factory(key)
                return coro
        self.coros: CoroCache = CoroCache()
        self.symtable: Dict[int, str] = defaultdict(
            cast(Callable[[], str], partial(next, iter(ascii_letters)))
        )
        return super().reset(memory)

    def _compile_coro(self, addr: int) -> AwaitableFunction:
        coro_name = f"amp_{addr}"
        self.pos = addr
        mem = self.memory
        
        lines = []
        symtable = self.symtable
        # if an address has been used as a symbol, *use the symbol indirectly*
        # as it probably has been altered. This is especially fun for position
        # mode values; write to address X, then use X as a position variable
        # means it could actually need to read memory from any number of
        # locations.
        lookup = {
            ParameterMode.position: lambda addr: (
                f"ns[cpu.symtable[ns.{symtable[addr]}]]" if addr in symtable
                else f"ns.{symtable[mem[addr]]}"
            ),
            ParameterMode.immediate: lambda addr: (
                f"ns.{symtable[addr]}" if addr in symtable
                else mem[addr]
            ),
        }
        # coroutines are bounded by jumps and halts
        while True:
            bound = self[mem[self.pos]]
            operands = (
                lookup[param](i)
                for i, param in enumerate(bound.modes, start=bound.offset)
            )
            self.pos, statement = bound.instruction(self.pos, *operands)
            if bound.instruction.output:
                target = bound.offset + bound.instruction.arg_count
                symbol = symtable[mem[target]]
                statement = f"ns.{symbol} = {statement}"
            lines.append(statement)
            if "return" in statement:
                break
        
        init = [
            f"async def {coro_name}():",
            f"    inp, out, ns = inputqueue.get(), outputqueue.get(), namespace.get()",
        ]
        coro = "\n".join(init) + "\n    ".join(["", *lines, ""])
        globs = {"cpu": self, "inputqueue": inputqueue, "outputqueue": outputqueue, "namespace": namespace}
        exec(coro, globs)
        return cast(AwaitableFunction, globs[coro_name])
        
    def _namespace(self) -> Dict[str, int]:
        symtable, memory = self.symtable, self.memory
        class Namespace(dict, Mapping[str, int]):
            def __init__(self):
                self.__dict__ = self
            def __missing__(self, name: str) -> int:
                addr = next(a for a, n in symtable.items() if n == name)
                value = self[name] = memory[addr]
                return value
        return Namespace()
        
    def execute_async(self, inp: asyncio.Queue, out: asyncio.Queue) -> asyncio.Task:
        async def intcode_task() -> None:
            inputqueue.set(inp)
            outputqueue.set(out)
            namespace.set(self._namespace())
            await self.coros[0]()
                 
        return asyncio.create_task(intcode_task())


async def execute_amps(cpu: AsyncCrossCompilerCPU, *settings: int) -> int:
    outputs: List[asyncio.Queue] = [asyncio.Queue() for _ in range(5)]
    # input for a coro is the output of the preceding amp
    inputs = [outputs[-1], *outputs[:-1]]
    for setting, inp in zip(settings, inputs):
        inp.put_nowait(setting)
    
    tasks = [cpu.execute_async(inp, outp) for inp, outp in zip(inputs, outputs)]

    inputs[0].put_nowait(0)
    await asyncio.wait(tasks)
    
    return outputs[-1].get_nowait()

async def async_maximize_thrust(memory: Memory, settings_range: range = range(5)):
    acc_cpu = AsyncCrossCompilerCPU().reset(memory)
    configurations = (execute_amps(acc_cpu, *c) for c in permutations(settings_range, r=5))
    return max(await asyncio.gather(*configurations))

In [7]:
print("Part 1, concurrently:", await async_maximize_thrust(memory))
print("Part 2, concurrently:", await async_maximize_thrust(memory, range(5, 10)))

Part 1, concurrently: 17440
Part 2, concurrently: 27561242
