# GEOtop Calibration Report

In [None]:
# Default

model_path = None
observations_path = None
parameters_path = 'data/parameters/default.csv'
timeout = 300
spinup = {'years': 1}
skip_months = []
default_parameters = {'FirstLayerWidth': 10, 'MaxDepth': 1000}
algorithm = 'NGO'
budget = 4096
num_workers = 256
scheduler_file = None

In [None]:
from dateutil.relativedelta import relativedelta
from timeit import default_timer as timer
from time import strftime, gmtime

import numpy as np
import pandas as pd
import scrapbook as sb
import nevergrad as ng
from dask.distributed import Client, as_completed

from mhpc_project.utils import date_parser, comparison_plots, convergence_plot
from mhpc_project.parameters import VarSoilParameters
from mhpc_project.models import VarSoilFullModel
from mhpc_project.comparators import KGE

In [None]:
# Glue inputs
sb.glue('model_path', model_path)
sb.glue('observations_path', observations_path)
sb.glue('parameters_path', parameters_path)
sb.glue('timeout', timeout)
sb.glue('spinup', spinup)
sb.glue('skip_months', skip_months)
sb.glue('default_parameters', default_parameters)
sb.glue('algorithm', algorithm)
sb.glue('budget', budget)
sb.glue('num_workers', num_workers)

In [None]:
parameters = VarSoilParameters(parameters_path, default_parameters)
model = VarSoilFullModel(model_path, store=False, timeout=timeout)
observations = pd.read_csv(observations_path,
                           parse_dates=[0],
                           date_parser=date_parser,
                           index_col=0)

if spinup:
    start_date = date_parser(model.settings['InitDateDDMMYYYYhhmm'])
    start_date = start_date + relativedelta(**spinup)
    selection = observations.index.map(lambda x: x >= start_date)
    observations = observations[selection]

if skip_months:
    selection = observations.index.map(lambda x: x.month not in skip_months)
    observations = observations[selection]

comparators = {col: KGE(observations[col]) for col in observations.columns}

log = []

In [None]:
client = Client(scheduler_file=scheduler_file)

In [None]:
comparison_plots(model, observations, parameters.instrumentation)

In [None]:
def compare(sim, obs):
    square_loss = 0.0
    for target in sim.columns:
        if target in obs.columns:
            x, y = sim[target], obs[target]
            r = x.corr(y) - 1
            m = x.mean() / y.mean() - 1
            v = x.std() / y.std() - 1
            square_loss += r * r + m * m + v * v
    return np.sqrt(square_loss)

# def objective_function(process, compare, inputs):
#     sim = process(*inputs.args, **inputs.kwargs)
#
#     square_loss = sum(f(sim[target]) ** 2
#                       for target, f in compare.items() if target in sim)
#
#     return inputs, np.sqrt(square_loss)

In [None]:
start = timer()
optimizer_class = ng.optimizers.registry[algorithm]
optimizer = optimizer_class(parameters.instrumentation,
                            budget=np.inf,
                            num_workers=num_workers)

remote_observations = client.scatter(observations, broadcast=True)
remote_model = client.scatter(model, broadcast=True)
while optimizer.num_tell < budget:
    remote_candidates = client.scatter([optimizer.ask()
                                        for _ in range(num_workers)])
    remote_simulations = [client.submit(lambda f, x: f(*x.args, **x.kwargs),
                                        remote_model,
                                        candidate)
                          for candidate in remote_candidates]
    remote_losses = [client.submit(compare,
                                   sim,
                                   remote_observations)
                     for sim in remote_simulations]

    remote_pairs = [client.submit(lambda x, y: (x, y),
                                  candidate,
                                  loss)
                    for candidate, loss in zip(remote_candidates, remote_losses)]
    # remote_comparators = client.scatter(comparators, broadcast=True)
    # futures = [client.submit(objective_function,
    #                          remote_model,
    #                          remote_comparators,
    #                          optimizer.ask())
    #            for _ in range(num_workers)]
    # futures = [client.submit(objective_function, optimizer.ask())
    #            for _ in range(num_workers)]
    completed_queue = as_completed(remote_pairs)
    for batch in completed_queue.batches():
        for future in batch:
            if future.status == 'finished':
                candidate, loss = future.result()
                optimizer.tell(candidate, loss)
                log.append((candidate, loss))
            else:
                new_candidate = optimizer.ask()
                new_sim = client.submit(lambda f, x: f(*x.args, **x.kwargs),
                                        remote_model,
                                        new_candidate)
                new_loss = client.submit(compare, new_sim, remote_observations)
                new_pair = client.submit(lambda x, y: (x,y), new_candidate, new_loss)
                # new_future = client.submit(objective_function,
                #                            remote_model,
                #                            remote_comparators,
                #                            optimizer.ask())
                # new_future = client.submit(objective_function, optimizer.ask())
                completed_queue.add(new_pair)

recommendation = optimizer.provide_recommendation()
elapsed = timer() - start

print("elapsed time:", strftime("%T", gmtime(elapsed)))

In [None]:
convergence_plot(log)

In [None]:
comparison_plots(model, observations, recommendation)

In [None]:
parameters_best = parameters.from_instrumentation(recommendation, column_name='best')
report = parameters.delta_mim(log)
report['best'] = parameters_best
report.sort_values('delta', key=np.abs, ascending=False)

In [None]:
# Outputs
sb.glue('report', report, 'pandas')
sb.glue('loss', recommendation.loss)
sb.glue('elapsed_time', elapsed)
