# Container DAG


https://docker-py.readthedocs.io/en/stable/index.html

https://docs.docker.com/reference/cli/docker/

In [1]:
from abc import ABCMeta, abstractmethod
import json
import random
import subprocess
import sys

from docker import from_env

import nevergrad as ng

In [2]:
sys.path.append("../../src")

from dockweed.topological_sort import kahns_algorithm
from dockweed.process import *
from dockweed.container import *

## Open Docker client

In [5]:
client = from_env()

### Get container processes

In [47]:
containers = []
processes = {}
# Loop through images. 
# Exclude intermediate layer images (all=False) and dangling images.
for image in client.images.list(all=False, filters={"dangling":False}):
    
    # Image tag
    tags = image.tags
    if not tags:
        continue
    image_tag = tags[0]
    
    # Labels with node names and commands.
    labels = image.attrs['ContainerConfig']['Labels']
    if labels is None:
        continue
    process_cmd = {name: cmd for name, cmd in labels.items() if name.startswith("process.")}
    if not process_cmd:
        continue
        
    # Processes
    for name, cmd in process_cmd.items():
        # Container should have no entry point, or this will call the entrypoint rather than label command.
        result = client.containers.run(image=image_tag, command=cmd, remove=True, stdout=True)
        try:
            dic = json.loads(result)
            if "description" in dic and "outputs" in dic and "inputs" in dic:
                
                if not containers or containers[-1].image != image_tag:
                    container = NodeContainer(image_tag=tags[0])
                    containers.append(container) 
                
                processes[name] = ContainerProcess(
                    name = name,
                    description = dic["description"],
                    container = containers[-1].name,
                    command = cmd,
                    inputs = dic["inputs"],
                    outputs = dic["outputs"]
                )    
        except json.decoder.JSONDecodeError:
            continue
    
print([container.name for container in containers])
print([process.name for process in processes.values()])

['multiple_divide', 'add_subtract']
['process.c_divide', 'process.c_multiply', 'process.python_add', 'process.python_subtract']


### Start containers

In [48]:
for container in containers:
    container.start()

ca231b060af38450507658a08f36ea2d89139ee4969a2e3c56e153843ad30a03
05100b6afcc0387f2e4b30e11cc2cc03fee354c71a71903d36580044db0feb0d


### Add generator processes

In [49]:
processes["random float"] = UniformFloatProcess()
processes["random choice"] = RandomChoice()

### Information about processes

In [50]:
for i, process in enumerate(processes.values()):
    print("-"*30)
    print(f"#{i} {process.name}")
    print(f"\t{process.description}")
    print(f"\tinputs = {process.inputs}")
    print(f"\toutputs = {process.outputs}")

------------------------------
#0 process.c_divide
	Division. z = a / b
	inputs = {'a': 1.0, 'b': 1.0}
	outputs = {'z': 1.0}
------------------------------
#1 process.c_multiply
	Multiplication. z = a * b
	inputs = {'a': 1.0, 'b': 1.0}
	outputs = {'z': 1.0}
------------------------------
#2 process.python_add
	Addition. z = x + y.
	inputs = {'x': 1.0, 'y': 1.0}
	outputs = {'z': 1.0}
------------------------------
#3 process.python_subtract
	Addition. z = x + y.
	inputs = {'x': 1.0, 'y': 1.0}
	outputs = {'z': 1.0}
------------------------------
#4 random float
	A uniformly distributed random float.
	inputs = {'min': 0.0, 'max': 1.0}
	outputs = {'n': 0.5}
------------------------------
#5 random choice
	A rnadom choice.
	inputs = {'choices': [1, 2, 3, 4]}
	outputs = {'choice': 1}


### Run individual processes

In [51]:
processes["process.python_add"].run({'x': 2.34, 'y': 10.78})

{'z': 13.12}

In [52]:
processes["process.c_multiply"].run({'a': 89.987, 'b': 10.0})

{'z': 899.8699951171875}

In [53]:
for _ in range(10):
    x = processes["random choice"].run({'choices': [3, 4, 8]})
    print(x)

{'choice': 4}
{'choice': 8}
{'choice': 4}
{'choice': 3}
{'choice': 4}
{'choice': 8}
{'choice': 3}
{'choice': 4}
{'choice': 8}
{'choice': 3}


### Run a graph of processes

#### Specify the graph

In [85]:
graph_specification = {
    "node alpha": {
        "process": "random choice",
        "inputs": {'choices': [1.2, 5.4, 6.7]}
    },
    "node beta": {
        "process": "random float",
        "inputs": {"min": -5.0, "max": 5.0}
    },
    "node a": {
        "process": "process.python_add",
        "inputs": {'x': ("node alpha", "choice"), 'y': 16.5}
    },
    "node b": {
        "process": "process.python_add",
        "inputs": {'x': ("node a", "z"), 'y': ("node beta", "n")}
    },
    "node c": {
        "process": "process.c_divide",
        "inputs": {'a': ("node a", "z"), 'b': ("node b", "z")}
    },
}

In [80]:
graph_specification = {
    "node a": {
        "process": "process.python_add",
        "inputs": {'x': 2.1, 'y': 16.5}
    },
    "node b": {
        "process": "process.python_add",
        "inputs": {'x': ("node a", "z"), 'y': 5.4}
    },
    "node c": {
        "process": "process.c_divide",
        "inputs": {'a': ("node a", "z"), 'b': ("node b", "z")}
    },
}

In [81]:
class Graph:
    
    def __init__(self, specification: dict, processes: list):
        
        # Check speification and processes.
        errors = []
        self.specification = {}
        self.processes = {}
        for node, spec in specification.items():
            
            # Process.
            if "process" not in spec:
                errors.append(f"Node {node}: No process is specified.")
                continue
            process_name = spec["process"]
            node_process = [process for process in processes if process.name == process_name]
            if not node_process:
                errors.append(f"Node {node}: There is no process named {process_name}.")
                continue
            self.processes[node] = node_process[0]
           
            # Inputs.
            if "inputs" not in spec:
                errors.append(f"Node {node}: No inputs are specified.")
                continue
            process_variables = set(self.processes[node].inputs.keys())
            spec_variables = set(spec["inputs"].keys())
            if process_variables != spec_variables:
                errors.append(f"Node {node}: The specified inputs do not match the process inputs.")
                continue
            self.specification[node] = spec
                
        if errors:
            raise KeyError("\n".join(errors))
            
        # Topology (edges).
        self.topology = {}
        for node, specification in self.specification.items():
            self.topology[node] = [
                value[0]
                for name, value in specification["inputs"].items()
                if isinstance(value, tuple) and len(value) == 2
            ]
            
        # Topological order.
        self.node_order = kahns_algorithm(self.topology, incoming=True)
        if not self.node_order:
            raise Exception("The graph is cyclic!")
            
    def is_input(self, node: str, variable: str) -> bool:
        return node in self.processes and variable in self.processes[node].inputs
    
    def input_value(self, node: str, variable: str):
        if self.is_input(node, variable):
            return self.specification[node]["inputs"][variable]
        return None
    
    def input_is_egde(self, node: str, variable: str) -> bool:
        return isinstance(self.input_value(node, variable), tuple)

    def is_output(self, node: str, variable: str) -> bool:
        return node in self.processes and variable in self.processes[node].outputs
    
    def output_value(self, node: str, variable: str):
        if self.is_output(node, variable):
            return self.processes[node].outputs[variable]
        return None
           
    def run(self):
        
        inputs = {}
        outputs = {}
        for node in self.topology:
            # Substitute tuple inputs with outputs from prior nodes.
            inputs[node] = self.specification[node]["inputs"].copy()
            for input_name, value in inputs[node].items():
                if not isinstance(value, tuple):
                    continue
                try:
                    incoming_node = value[0]
                    incoming_variable = value[1]
                    incoming_value = outputs[incoming_node][incoming_variable]
                    inputs[node][input_name] = incoming_value
                except KeyError:
                    raise KeyError(f"Invalid inputs for")
        
            # Execute, storing the outputs.
            outputs[node] = self.processes[node].run(inputs[node])
            
        return inputs, outputs

graph = Graph(graph_specification, list(processes.values()))

In [82]:
inputs, outputs = graph.run()
print(inputs)
print("="*20)
print(outputs)

Error response from daemon: No such container: add_subtract


KeyError: 'Invalid inputs for'

In [68]:
class NevergradOptimizer:
    
    def __init__(self, graph: Graph, free_inputs: list, optimize_on: tuple):
        
        self.graph = graph
        self.free_inputs = free_inputs
        self.instrumentation = ng.p.Instrumentation(*[inp[2] for inp in free_inputs])
        
        self.optimize_on = optimize_on
        errors = []
        
        # Check free inputs.
        for node, variable, parameterisation in free_inputs:
            value = self.graph.input_value(node, variable)
            if value is None:
                errors.append(f"Input {node}, {variable}: Does not exist.")
                continue
            if isinstance(value, tuple):
                errors.append(f"Input {node}, {variable}: Is an edge.")
                continue
            if isinstance(value, list) and isinstance(parameterisation, ng.p.Scalar):
                errors.append(f"Input {node}, {variable}: Is a list and must have a non-scalar parameterisation.")
                continue
                
            # ????
           
        # Check output to be optimised.
        node, variable = optimize_on
        value = self.graph.output_value(node, variable)
        if value is None:
            errors.append(f"Output {node}, {variable}: Does not exist.")
        elif not isinstance(value, (int, float)):
            errors.append(f"Output {node}, {variable}: Must a real number to optimise.")
            
        if errors:
            raise KeyError("\n".join(errors))
            
    def optimize(self, optimizer_class, minimize=True, **optimizer_kwargs):
        
        def run(*vargs):
            # Set graph inputs.
            for i, (node, variable, _) in enumerate(self.free_inputs):
                self.graph.specification[node]["inputs"][variable] = vargs[i]  # Have set input func for graph.
            # Run graph.
            return self.graph.run()
            
        def objective(*vargs):
            _, outputs = run(*vargs)
            # Return optimized output.
            node, variable = self.optimize_on
            value = outputs[node][variable]
            if not minimize:
                value *= -1
            return value
        
        optimizer = optimizer_class(parametrization=self.instrumentation, **optimizer_kwargs)
        solution = optimizer.minimize(objective).value[0]
        
        return run(*solution)
        

    

nop = NevergradOptimizer(
    graph = graph,
    free_inputs = [
        ("node a", "x", ng.p.Scalar(init=2.1, lower=-5, upper=5)), 
        ("node b", "y", ng.p.Scalar(init=1.7, lower=-5, upper=5))
    ],
    optimize_on = ("node c", "z")
)

In [75]:
nop.optimize(ng.optimization.optimizerlib.CMA, minimize=True, budget=100)

({'node a': {'x': 1.3157249099242199, 'y': 16.5},
  'node b': {'x': 17.81572490992422, 'y': 4.993177598769161},
  'node c': {'a': 17.81572490992422, 'b': 22.808902508693382}},
 {'node a': {'z': 17.81572490992422},
  'node b': {'z': 22.808902508693382},
  'node c': {'z': 0.7810864448547363}})

In [None]:
graph_specification = {
    "node a": {
        "process": "process.python_add",
        "inputs": {'x': 2.1, 'y': 16.5}
    },
    "node b": {
        "process": "process.python_add",
        "inputs": {'x': ("node a", "z"), 'y': 5.4}
    },
    "node c": {
        "process": "process.c_divide",
        "inputs": {'a': ("node a", "z"), 'b': ("node b", "z")}
    },
}

### Stop containers

In [76]:
for container in containers:
    container.stop()

multiple_divide
add_subtract


In [None]:
# echo '{"x": 11.98769, "y": 186.78}' | docker exec -i some-node-a python run.py


## Close client

In [42]:
client.close()