In [1]:
from skopt import Optimizer
from skopt.learning import GaussianProcessRegressor
from skopt.learning.gaussian_process.kernels import RBF, ConstantKernel, Product
from tqdm import tqdm_notebook as tqdm
from skopt import gp_minimize
from time import sleep, time
import docker
import os
from pathlib import Path
import numpy as np

from get_borders import get_borders

  from numpy.core.umath_tests import inner1d


### Глобальные Параметры

In [3]:
# первые n_initial_points модель не обучается
n_initial_points = 5

# число итераций цикла
n_calls = 4

# оптимизация на кубе [low_constraint, high_constraint]^dim
low_constraint, high_constraint = 0.001, 1.
dim = 1

# столько контейнеров вызываются для параллельной работы
batch_size = 2

# директория на сервере, хранит директории, которые будут монтироваться в контейнеры
folder_local = '/home/matyushinleonid/lhcb_ecal/summer/folder_local'

# директория для файлов input и output внутри контейнера
folder_container = '/home/nb_user/logs'

# python-клиент докера
client = docker.from_env()

# имя образа
container = "calorbuild"

# имена директорий, каждая соответствует своей копии образа
worker_names = ['first_worker', 'second_worker']
#worker_names = ['first_worker']

### Модель

In [4]:
kernel = Product(ConstantKernel(1), RBF(1)) + ConstantKernel(1)

model = GaussianProcessRegressor(alpha=0, 
                                 normalize_y=True, 
                                 noise='gaussian', 
                                 n_restarts_optimizer=10, 
                                 kernel=kernel)

optimizer = Optimizer([[low_constraint, high_constraint]]*dim,
                      model,
                      n_initial_points=n_initial_points,
                      acq_func='EI',
                      acq_optimizer='lbfgs',
                      random_state=None)

### Оптимизация (+ работа с контейнерами)

In [5]:
def write_input_file(worker_name, input_data):
    file_to_write = Path(folder_local) / Path(worker_name) / 'input.txt' #'{}/{}/input.txt'.format(folder_local, worker_name)
    
    a_inner = input_data[0]
    result = get_borders(10,16,1,6,8,a_inner)
    
    np.set_printoptions(threshold=np.inf, linewidth=np.inf)  # turn off summarization, line-wrapping
    with open(file_to_write, 'w') as f:
        np.savetxt(f, result, fmt="%i", delimiter=' ', newline='\n', header='', footer='', comments='# ')

def create_job(worker_name):
    folder_to_mount = '{}/{}'.format(folder_local, worker_name)
    client.containers.run(container,
                          privileged=True,
                          remove=True,
                          detach=True,
                          hostname='dev',
                          tty=True,
                          stdin_open=True,
                          volumes={folder_to_mount: {'bind': folder_container,
                                                     'mode': 'rw'}})

def read_output_file(worker_name):
    file_to_read = Path(folder_local) / Path(worker_name) / 'output.txt' #'{}/{}/output.txt'.format(folder_local, worker_name)
    with open(file_to_read, 'r') as myfile:
        data = myfile.read()
    return float(data)

def delete_output_file(worker_name):
    file_to_delete = Path(folder_local) / Path(worker_name) / 'output.txt'
    os.remove(file_to_delete)
    
def do_output_file_exists(worker_name):
    file_to_check = Path(folder_local) / Path(worker_name) / 'output.txt'
    if file_to_check.exists(): 
        return True
    else:
        return False

In [6]:
for i in tqdm(range(n_calls)):
    
    # start simulations
    X = optimizer.ask(n_points=batch_size)
    for i, worker_name in enumerate(worker_names):
        x = X[i]
        write_input_file(worker_name, x)
        create_job(worker_name)
        
    # wait till simulation is done    
    simulation_done_statuses = [False] * batch_size
    time_before_sumulations = time()
    while sum(simulation_done_statuses) < batch_size:
        for i, worker_name in enumerate(worker_names):
            if do_output_file_exists(worker_name):
                simulation_done_statuses[i] = True
        sleep(10)
        
        # sometimes docker doesn't produce output file. In this case we skip loop step
        if time() - time_before_sumulations > 20 * 60: 
            break
    
    # perform an optimization step
    #Y = []
    for i, worker_name in enumerate(worker_names):
        if simulation_done_statuses[i]:
            y = read_output_file(worker_name)
            delete_output_file(worker_name)
            optimizer.tell(X[i], y)






In [10]:
optimizer.Xi

[[0.6377991296422721],
 [0.9988647066986664],
 [0.5930026775700424],
 [0.19856734532563303],
 [0.020160185398327923],
 [0.001],
 [0.001],
 [0.036025874521633576]]

In [9]:
optimizer.yi

[21.512728,
 21.878738,
 21.535324,
 21.005547,
 20.740114,
 20.73403,
 20.73403,
 20.73403]