# Container DAG


https://docker-py.readthedocs.io/en/stable/index.html

https://docs.docker.com/reference/cli/docker/

In [9]:
from abc import ABCMeta, abstractmethod
import json
import random
import subprocess
import sys

from docker import from_env

In [2]:
sys.path.append("../src")

from topology.topological_sort import kahns_algorithm

## Open Docker client

In [3]:
client = from_env()

### Get container processes

In [15]:
class NodeContainer:
    """ The container associated with one or more nodes.
    
    Attributes
    ----------
    image: str
        The image "name:tag".
    name: str
        The container name. The container will only actually exist after calling self.start().
    """
    def __init__(self, image_tag):
        self.image = image_tag
        self.name = image_tag.split(":")[0]
    
    def start(self):
        """ Start the container in detached mode. """
        cmd = f"docker run --name {self.name} -i -d {self.image}"
        _ = subprocess.run(cmd, shell=True)
        
    def stop(self):
        """ Stop the container and remove it. """
        cmd = f"docker rm {self.name} -f"
        _ = subprocess.run(cmd, shell=True)
        
        
class NodeProcess:
    
    def __init__(
        self, 
        name: str, 
        description: str, 
        inputs: dict, 
        outputs: dict
    ):
        self.name = name
        self.description = description
        self.inputs = inputs
        self.outputs = outputs
        
    @abstractmethod
    def run(self, inputs: dict) -> dict:
        pass

            
class ContainerProcess(NodeProcess):
    """ A node.
    
    Attributes
    ----------
    name: str
        Name of the process. This is the key of an image label.
    container: str
        The name of the container that runs the node.    
    command: str
        The command to the container that runs the node. This is the image label value.
    inputs: dict
        A template of input arguments to the node (inputs to its command).
    outputs: dict
        A template of output arguments to the node (outputs to its command).
    """
               
    def __init__(
        self, 
        name: str, 
        description: str, 
        inputs: dict, 
        outputs: dict,
        container: str,
        command: str,
    ):
        self.container = container
        self.command = command
        super().__init__(name=name, description=description, inputs=inputs, outputs=outputs)
        
        
    def run(self, inputs: dict) -> dict:
        
        # Check inputs
        # ???????
        
        # Pipe the inputs into the node's command.
        cmd = f"echo '{json.dumps(inputs)}' | docker exec -i {self.container} {self.command}"
        result = subprocess.run(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
        
        # Get the output JSON.
        try:
            return json.loads(result.stdout)
        except json.decoder.JSONDecodeError:
            return {}
        
               
class UniformFloatProcess(NodeProcess):
    
    def __init__(self):
        super().__init__(
            name="random float", 
            description=f"A uniformly distributed random float.", 
            inputs={"min": 0.0, "max": 1.0}, 
            outputs={"n": 0.5}
        )
        
    def run(self, inputs: dict) -> dict:
        return {"n": random.uniform(inputs["min"], inputs["max"])}
               
class RandomChoice(NodeProcess):
    
    def __init__(self):
        super().__init__(
            name="random choice", 
            description=f"A rnadom choice.", 
            inputs={"choices": [1, 2, 3, 4]}, 
            outputs={"choice": 1}
        )
        
    def run(self, inputs: dict) -> dict:
        return {"choice": random.choice(inputs["choices"])}
        

In [16]:
containers = []
processes = {}
# Loop through images. 
# Exclude intermediate layer images (all=False) and dangling images.
for image in client.images.list(all=False, filters={"dangling":False}):
    
    # Image tag
    tags = image.tags
    if not tags:
        continue
    image_tag = tags[0]
    
    # Labels with node names and commands.
    labels = image.attrs['ContainerConfig']['Labels']
    if labels is None:
        continue
    process_cmd = {name: cmd for name, cmd in labels.items() if name.startswith("process.")}
    if not process_cmd:
        continue
        
    # Processes
    for name, cmd in process_cmd.items():
        # Container should have no entry point, or this will call the entrypoint rather than label command.
        result = client.containers.run(image=image_tag, command=cmd, remove=True, stdout=True)
        try:
            dic = json.loads(result)
            if "description" in dic and "outputs" in dic and "inputs" in dic:
                
                if not containers or containers[-1].image != image_tag:
                    container = NodeContainer(image_tag=tags[0])
                    containers.append(container) 
                
                processes[name] = ContainerProcess(
                    name = name,
                    description = dic["description"],
                    container = containers[-1].name,
                    command = cmd,
                    inputs = dic["inputs"],
                    outputs = dic["outputs"]
                )    
        except json.decoder.JSONDecodeError:
            continue
    
print([container.name for container in containers])
print([process.name for process in processes.values()])

['multiple_divide', 'add_subtract']
['process.c_divide', 'process.c_multiply', 'process.python_add', 'process.python_subtract']


### Start containers

In [17]:
for container in containers:
    container.start()

aed6bc4e50e4e607ab3f16fd15e85d1b02dafeb39d9aaf11cae96c36ea2cd34f
238f88bea2719103d8859a659258741e82e8777e06092f79968291d352606344


### Add generator processes

In [18]:
processes["random float"] = UniformFloatProcess()
processes["random choice"] = RandomChoice()

### Information about processes

In [19]:
for i, process in enumerate(processes.values()):
    print("-"*30)
    print(f"#{i} {process.name}")
    print(f"\t{process.description}")
    print(f"\tinputs = {process.inputs}")
    print(f"\toutputs = {process.outputs}")

------------------------------
#0 process.c_divide
	Division. z = a / b
	inputs = {'a': 1.0, 'b': 1.0}
	outputs = {'z': 1.0}
------------------------------
#1 process.c_multiply
	Multiplication. z = a * b
	inputs = {'a': 1.0, 'b': 1.0}
	outputs = {'z': 1.0}
------------------------------
#2 process.python_add
	Addition. z = x + y.
	inputs = {'x': 1.0, 'y': 1.0}
	outputs = {'z': 1.0}
------------------------------
#3 process.python_subtract
	Addition. z = x + y.
	inputs = {'x': 1.0, 'y': 1.0}
	outputs = {'z': 1.0}
------------------------------
#4 random float
	A uniformly distributed random float.
	inputs = {'min': 0.0, 'max': 1.0}
	outputs = {'n': 0.5}
------------------------------
#5 random choice
	A rnadom choice.
	inputs = {'choices': [1, 2, 3, 4]}
	outputs = {'choice': 1}


### Run individual processes

In [22]:
processes["process.python_add"].run({'x': 2.34, 'y': 10.78})

{'z': 13.12}

In [14]:
processes["process.c_multiply"].run({'a': 89.987, 'b': 10.0})

{'x': 899.8699951171875}

In [31]:
for _ in range(10):
    x = processes["random choice"].run({'choices': [3, 4, 8]})
    print(x)

{'choice': 3}
{'choice': 4}
{'choice': 4}
{'choice': 8}
{'choice': 8}
{'choice': 8}
{'choice': 3}
{'choice': 3}
{'choice': 8}
{'choice': 3}


### Run a graph of processes

#### Specify the graph

In [22]:
graph_specification = {
    "node alpha": {
        "process": "random choice",
        "inputs": {'choices': [1.2, 5.4, 6.7]}
    },
    "node beta": {
        "process": "random float",
        "inputs": {"min": -5.0, "max": 5.0}
    },
    "node a": {
        "process": "process.python_add",
        "inputs": {'x': ("node alpha", "choice"), 'y': 16.5}
    },
    "node b": {
        "process": "process.python_add",
        "inputs": {'x': ("node a", "z"), 'y': ("node beta", "n")}
    },
    "node c": {
        "process": "process.c_divide",
        "inputs": {'a': ("node a", "z"), 'b': ("node b", "z")}
    },
}

#### Get the topological (execution) order of the nodes

In [23]:
graph_topology = {}
    
for node, content in graph_specification.items():
    if "inputs" not in content or "process" not in content or not isinstance(content["inputs"], dict):
        continue
        
    graph_topology[node] = [
        value[0]
        for input_name, value in content["inputs"].items()
        if isinstance(value, tuple)
    ]
    
graph_topology

{'node alpha': [],
 'node beta': [],
 'node a': ['node alpha'],
 'node b': ['node a', 'node beta'],
 'node c': ['node a', 'node b']}

In [24]:
topological_order = kahns_algorithm(graph_topology, incoming=True)
topological_order

['node alpha', 'node beta', 'node a', 'node b', 'node c']

#### Execute the graph

In [40]:
inputs = {}
outputs = {}

for node in topological_order:
    
    # Find node's process.
    process_name = graph_specification[node]["process"]
    if process_name not in processes:
        raise KeyError(f"No process called {process_name}.")
    process_obj = processes[process_name]

    # Substitute tuple inputs with outputs from prior nodes.
    inputs[node] = graph_specification[node]["inputs"].copy()
    for input_name, value in inputs[node].items():
        if not isinstance(value, tuple):
            continue
        try:
            incoming_node = value[0]
            incoming_variable = value[1]
            incoming_value = outputs[incoming_node][incoming_variable]
            inputs[node][input_name] = incoming_value
        except KeyError:
            raise KeyError(f"Invalid inputs for")
        
    # Execute, storing the outputs.
    outputs[node] = process_obj.run(inputs[node])
    
print("-"*20)
print(inputs)
print("-"*20)
print(outputs)

--------------------
{'node alpha': {'choices': [1.2, 5.4, 6.7]}, 'node beta': {'min': -5.0, 'max': 5.0}, 'node a': {'x': 1.2, 'y': 16.5}, 'node b': {'x': 17.7, 'y': -2.5297371678600324}, 'node c': {'a': 17.7, 'b': 15.170262832139967}}
--------------------
{'node alpha': {'choice': 1.2}, 'node beta': {'n': -2.5297371678600324}, 'node a': {'z': 17.7}, 'node b': {'z': 15.170262832139967}, 'node c': {'x': 1.1667563915252686}}


### Stop containers

In [38]:
for container in containers:
    container.stop()

multiple_divide
add_subtract


In [None]:
# echo '{"x": 11.98769, "y": 186.78}' | docker exec -i some-node-a python run.py


## Close client

In [39]:
client.close()