# GEOtop Calibration Report

In [None]:
# Default

model_path = None
observations_path = None
parameters_path = 'data/parameters/default.csv'
timeout = 300
default_parameters = {'FirstLayerWidth': 10, 'MaxDepth': 1000}
algorithm = 'NGO'
budget = 8192
num_workers = 256
scheduler_file = None

In [None]:
from timeit import default_timer as timer
from time import strftime, gmtime

import numpy as np
import pandas as pd
import scrapbook as sb
import nevergrad as ng
from dask.distributed import Client, as_completed

from mhpc_project.utils import date_parser, comparison_plots, convergence_plot, kge_cmp
from mhpc_project.parameters import VarSoilParameters
from mhpc_project.models import VarSoilFullModel

In [None]:
# Glue inputs
sb.glue('model_path', model_path)
sb.glue('observations_path', observations_path)
sb.glue('parameters_path', parameters_path)
sb.glue('timeout', timeout)
sb.glue('default_parameters', default_parameters)
sb.glue('algorithm', algorithm)
sb.glue('budget', budget)
sb.glue('num_workers', num_workers)

In [None]:
parameters = VarSoilParameters(parameters_path, default_parameters)
model = VarSoilFullModel(model_path, timeout=timeout)
observations = pd.read_csv(observations_path,
                           parse_dates=[0],
                           date_parser=date_parser,
                           index_col=0)

log = []

In [None]:
client = Client(scheduler_file=scheduler_file)

In [None]:
comparison_plots(model, observations, parameters.instrumentation)

In [None]:
start = timer()
optimizer_class = ng.optimizers.registry[algorithm]
optimizer = optimizer_class(parameters.instrumentation,
                            budget=np.inf,
                            num_workers=num_workers)

remote_observations = client.scatter(observations, broadcast=True)
remote_model = client.scatter(model, broadcast=True)
while optimizer.num_tell < budget:
    remote_candidates = client.scatter([optimizer.ask()
                                        for _ in range(num_workers)])
    remote_simulations = [client.submit(lambda f, x: f(*x.args, **x.kwargs),
                                        remote_model,
                                        candidate)
                          for candidate in remote_candidates]
    remote_losses = [client.submit(kge_cmp,
                                   sim,
                                   remote_observations)
                     for sim in remote_simulations]

    remote_pairs = [client.submit(lambda x, y: (x, y),
                                  candidate,
                                  loss)
                    for candidate, loss in zip(remote_candidates, remote_losses)]
    completed_queue = as_completed(remote_pairs)
    for batch in completed_queue.batches():
        for future in batch:
            if future.status == 'finished':
                candidate, loss = future.result()
                optimizer.tell(candidate, loss)
                log.append((candidate, loss))
            else:
                new_candidate = optimizer.ask()
                new_sim = client.submit(lambda f, x: f(*x.args, **x.kwargs),
                                        remote_model,
                                        new_candidate)
                new_loss = client.submit(kge_cmp, new_sim, remote_observations)
                new_pair = client.submit(lambda x, y: (x,y), new_candidate, new_loss)
                completed_queue.add(new_pair)

recommendation = optimizer.provide_recommendation()
elapsed = timer() - start

print("elapsed time:", strftime("%T", gmtime(elapsed)))

In [None]:
convergence_plot(log)

In [None]:
comparison_plots(model, observations, recommendation)

In [None]:
parameters_best = parameters.from_instrumentation(recommendation, column_name='best')
report = parameters.delta_mim(log)
report['best'] = parameters_best
report.sort_values('delta', key=np.abs, ascending=False)

In [None]:
# Outputs
sb.glue('report', report, 'pandas')
sb.glue('loss', recommendation.loss)
sb.glue('elapsed_time', elapsed)
