In [None]:
# default_exp __init__
# hide

_FNAME='init'

import unittest
from unittest import mock
from nbdev.showdoc import show_doc
from nbdev.export import notebook2script
import os
TESTCASE = unittest.TestCase()
_nbpath = os.path.join(_dh[0], _FNAME+'.ipynb')

# Port management
You should be able to start two of the same container.  To do this we will randomize the ports on the host machine, and map them to predictable ports in the container.

In [None]:
#export 
import socket
import atexit
import docker
import uuid
import logging
import random
from numbers import Number

import requests
from retrying import retry
logger = logging.getLogger()

def check_if_port_is_bound(port_number):
    '''
    Attempts to bind a socket.  One of two things will happen
    - Nothing.  The socket is open
    - OSError.  The socket is in use
    '''
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)    
    s.bind(('', port_number))
    s.close()    

@retry(stop_max_attempt_number=50)
def get_random_port_numbers(start, end, quantity=1):
    '''
    gives you sequential port numbers.
    Randomly tries sequences of ports until it finds an open sequence.
    '''
    starting_number = random.randint(start, end-quantity)
    port_numbers = list(range(starting_number, starting_number+quantity))
    for port_number in port_numbers:
        check_if_port_is_bound(port_number)
    return port_numbers
    

In [None]:
def test_random_ports():
    #bind the socket at the start of the test
    s30k = socket.socket(socket.AF_INET, socket.SOCK_STREAM)    
    s30k.bind(('', 30000))
    s30k1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)    
    s30k1.bind(('', 30001))
    
    with TESTCASE.assertRaises(OSError):
        get_random_port_numbers(30000, 30002, 1)
        
    port = get_random_port_numbers(30000, 30003, 1)
    #the random is not inclusive of the "end" number
    TESTCASE.assertEqual(port, [30002])
        
    s30k.close()
    s30k1.close()
        
test_random_ports()

The docker python bindings has a syntax for ports that looks like this
`{container port: host port}`.  We will maintain that syntax here for our port pairs so that we can pass it in directly.

In [None]:
#export 
def generate_port_pairings(ports, min_port, max_port):
    '''
    Create a dictionary that is compatible with the docker interface
    that describes the ports
    {container_port: host_port}  e.g.
    {2222:3333} exposes port 2222 in the container to the host's 3333
    https://docker-py.readthedocs.io/en/stable/containers.html
    '''
    port_pairs = {}
    ports = [] if ports is None else ports

    if isinstance(ports, Number):
        quantity = 1
        ports = [ports]
    else:
        quantity = len(ports)
    
    host_ports = get_random_port_numbers(min_port, max_port, quantity=quantity)
    for index, container_port in enumerate(ports):
        host_port = host_ports[index]
        port_pairs[container_port] = host_port
        
    return port_pairs


In [None]:
def test_port_pairs():
    #random.seed(42)
    pp = generate_port_pairings([7688, 7999], min_port=5000, max_port=6000)
    TESTCASE.assertEqual(len(pp), 2)

    pp = generate_port_pairings([80, 8888, 423], min_port=5000, max_port=6000)
    TESTCASE.assertEqual(len(pp), 3)
    
    pp = generate_port_pairings([80], min_port=5000, max_port=6000)
    TESTCASE.assertEqual(len(pp), 1)

test_port_pairs()

The docker interface gives very complete information about the ports.  For example
`{'7443/tcp': [{'HostIp': '0.0.0.0', 'HostPort': '7841'}]`.  For our purposes, this is often too much.  Give some syntactic sugar to easily access just the host or just the port number

In [None]:
#export
def lookup_host(container, container_port, protocol='tcp'):
    port_str = '{}/{}'.format(container_port, protocol)
    return container.ports[port_str][0]

def lookup_host_port(container, container_port, protocol='tcp'):
    port_str = '{}/{}'.format(container_port, protocol)
    hostport = lookup_host(container, container_port)['HostPort']
    return int(hostport)



We introduce `NamedPorts` here to make finding the appropriate port easier.  Here's the use case.  Suppose you are using a database system that exposes 3 different ports: an http interface, an https interface, and the actual database connection.  You would normally have to depend on the special port numbers to find the port you want.  However if we name them, say, `['http', 'https', 'db']` then you know you always have to connect to the named port `db`.

In [None]:
class NamedPorts:
    pass

def set_port_name(container, container_port, name):
    hostport = lookup_host_port(container, container_port)
    try:
        named_ports = container.port
    except AttributeError:
        named_ports = NamedPorts()
    
    setattr(named_ports, name, hostport)
    container.port = named_ports
    return container


In [None]:
def test_lookup_host_port():
    class FakeContainer:
        def __init__(self, ports):
            self.ports = ports
    ports = {'7443/tcp': [{'HostIp': '0.0.0.0', 'HostPort': '7841'}],
             '7473/tcp': None,
             '7474/tcp': [{'HostIp': '0.0.0.0', 'HostPort': '7840'}],
             '7687/tcp': [{'HostIp': '0.0.0.0', 'HostPort': '7839'}]}
    fc = FakeContainer(ports)
    TESTCASE.assertEqual(lookup_host_port(fc, 7443), 7841)
    TESTCASE.assertEqual(lookup_host_port(fc, 7687), 7839)    
test_lookup_host_port()

# Containers

A general function that spins up any docker image as a container.  It has the following properties.

- ports.  You can either specify the ports exactly, or get random ports
- persist.  The docker container will shut down by default at the end of the python program.  This is what you want most of the time.
- blocking.  Most of the time you want the program to block (wait) until the container is fully available.  This is checked by doing a request.get against a certain port.  e.g. Grafana is available when I can request.get the login page.
- environment.  Pass in a dictionary to set environment variables inside the container.

In [None]:
#export 

@retry(wait_fixed=1000, stop_max_attempt_number=30)
def block_for_startup(container, container_port):
    '''
    Keep trying http get until the container is available and serves content
    '''
    host = lookup_host(container, container_port)
    logger.debug("Waiting for avilability of {host}".format(host=host))
    resp = requests.get("http://{HostIp}:{HostPort}".format(**host))
    return resp

def run_container(image, container_name=None, 
                  ports=None, min_port=6000, max_port=10000, 
                  persist=False, docker_client=None,
                  block_until_port_available=None,
                  environment=None, 
                  **kwargs):
    '''
    ports is either 
        - a port number 
        - list of port numbers you need inside the container.  The ports on the host will be randomized
        - a dictionary of the port number mappings you want in the form of container:host
    environment: dict
        key:value pairs of environment variables for the container
        
    block_until_port_available: int
        try to requests.get this port until it is successful or times out multiple times
        
    any additional kwargs will be passed directly into the docker.run command
    https://docker-py.readthedocs.io/en/stable/containers.html
    '''
    client = docker_client if docker_client is not None else docker.from_env()
    container_name = container_name or uuid.uuid4().hex
    try:
        container = client.containers.get(container_name)
    except docker.errors.NotFound:
        docker_run_kwargs = dict(image=image,
                             remove=True, detach=True, auto_remove=True,
                             name=container_name)
        if environment:
            docker_run_kwargs['environment'] = environment

        if isinstance(ports, dict):
            docker_run_kwargs['ports'] = ports
        else:
            port_pairs = generate_port_pairings(ports, min_port, max_port)
            docker_run_kwargs['ports'] = port_pairs

        docker_run_kwargs.update(kwargs)
        container = client.containers.run(**docker_run_kwargs)

    if not persist:
        atexit.register(container.stop)
        
    container.reload()
    
    if block_until_port_available:
        block_for_startup(container, container_port=block_until_port_available)
    return container

Heres a design pattern of how you might want to package up specific containers.

In [None]:
def run_httpd(version='alpine', port=None, **kwargs):
    image = "httpd:{}".format(version)
    if port:
        ports = {80: port}
    else:
        ports = [80]
    additional_kwargs = {}
    additional_kwargs.update(kwargs)
    container = run_container(image, ports=ports, 
                          block_until_port_available=80,
                          **additional_kwargs)
    container = set_port_name(container, 80, 'http')
    return container

In [None]:

def test_blocking_until():
    c1 = run_httpd()
    resp = block_for_startup(c1, 80)
    TESTCASE.assertEqual(resp.status_code, 200)    
    c1.stop()
test_blocking_until()

In [None]:
def test_multiple_containers_colliding():
    #multiple containers with exactly the same port mapping should fail
    with TESTCASE.assertRaises(docker.errors.APIError):
        c1 = run_httpd(port=8080)
        c2 = run_httpd(port=8080)
    
    c1.stop()
    try:
        #just in case made for some strange reason
        c2.stop()
    except UnboundLocalError:
        pass
    
def test_run_multiple_containers():
    #should succeed
    c1 = run_httpd()
    c2 = run_httpd()

    #because they have different (random) ports
    TESTCASE.assertNotEqual(c1.port.http, c2.port.http)
    
    c1.stop()
    c2.stop()
    
test_run_multiple_containers()
test_multiple_containers_colliding()

If you specify the container_name as an argument, docker will first look for containers with that name and return it to you.  This will not spin up two different containers.

In [None]:
def test_retrieve_same_container():
    c1 = run_httpd(container_name="unittest_alpine")
    c2 = run_httpd(container_name="unittest_alpine")
    
    TESTCASE.assertEqual(c1.id, c2.id)
    c1.stop()
    try:
        c2.stop()
    except docker.errors.NotFound:
        pass
    #I think there is a race condition where sometimes this fails

test_retrieve_same_container()

In [None]:

notebook2script(_nbpath)

FileNotFoundError: [Errno 2] No such file or directory: '/home/ubuntu/scylla/scylla/_nbdev.py'