In [None]:
# Defaults

# System settings
systmpfs = '/tmp'
inputs_path = 'inputs/Matsch_B2/run'
geotop_path = '../geotop/build/geotop'
variables_path = 'inputs/Matsch_B2/variables.csv'

# Optimizer settings
num_workers = 2
budget = 16
algorithm = 'OnePlusOne'
timeout = 120
monitor_interval = 10
scale = 'D'
startdate = '01/01/2011 00:00'
targets = ['soil_moisture_content_50', 'sensible_heat_flux_in_air']
weights = [1.0, 1.0]

In [None]:
from subprocess import CalledProcessError, TimeoutExpired
from tempfile import TemporaryDirectory, NamedTemporaryFile
from concurrent.futures import ProcessPoolExecutor

import numpy as np
from numpy.random import uniform
import pandas as pd
import matplotlib.pyplot as plt

import nevergrad as ng
import hiplot as hip
from SALib.sample import saltelli
from SALib.analyze import sobol

from common import observations, GEOtopRun

In [None]:
class GEOtopRunLogVars(GEOtopRun):
    
    def preprocess(self, working_dir, *args, **kwargs):
        
        for key, value in kwargs.items():
            if variables.type[key] == 'log':
                kwargs[key] = 10 ** value
                
        super().preprocess(working_dir, *args, **kwargs)

In [None]:
model = GEOtopRunLogVars(inputs_path,
                         exe=geotop_path,
                         run_args={'check': True, 
                                   'capture_output': True, 
                                   'timeout': timeout})

variables = pd.read_csv(variables_path, index_col='name')
variables['synth'] = [uniform(low=var.lower, high=var.upper) for name, var in variables.iterrows()]

In [None]:
with TemporaryDirectory(dir=systmpfs) as tmpdir:
    synth = model.eval(tmpdir, **variables.synth.to_dict())

synth = observations(synth, scale=scale, start=startdate)

In [None]:
def loss_function(*args, sim=None, **kwargs):
    
    if sim is None:
        with TemporaryDirectory(dir=systmpfs) as tmpdir:
            try:
                sim = model.eval(tmpdir, *args, **kwargs)
            except CalledProcessError:
                return np.nan
            except TimeoutExpired:
                return np.nan
    
    return sum(w * synth.metric(t, sim) for w, t in zip(weights, targets)) / sum(weights)

In [None]:
with TemporaryDirectory(dir=systmpfs) as tmpdir:
    sim = model.eval(tmpdir)
    print(f"Before optimization loss is {loss_function(sim=sim)}")
    for t in targets:
        synth.compare(t, sim, desc=t)
        plt.show()

In [None]:
kwargs = {name: ng.p.Scalar(init=value.suggested, lower=value.lower, upper=value.upper) 
          for name, value in variables.iterrows()}

optimizer = ng.optimizers.registry[algorithm](parametrization=ng.p.Instrumentation(**kwargs),
                                              budget=budget,
                                              num_workers=num_workers)

logfile = NamedTemporaryFile(dir=systmpfs)
logger = ng.callbacks.ParametersLogger(logfile.name)
optimizer.register_callback("tell",  logger)

In [None]:
with ProcessPoolExecutor(max_workers=optimizer.num_workers) as executor:
    recommendation = optimizer.minimize(loss_function, 
                                        executor=executor, 
                                        batch_mode=False)

In [None]:
variables['best'] = pd.Series(recommendation.kwargs)
variables['err'] = 3 * (variables.synth - variables.best).abs() / (variables.upper - variables.lower)
variables.sort_values('err')

In [None]:
logger.load()
experiment = logger.to_hiplot_experiment()

In [None]:
hidden_columns = ['uid', 
                  'from_uid', 
                  '#parametrization', 
                  '#optimizer', 
                  '#optimizer#noise_handling', 
                  '#optimizer#mutation',
                  '#optimizer#crossover',
                  '#optimizer#initialization',
                  '#optimizer#scale',
                  '#optimizer#recommendation',
                  '#optimizer#F1',
                  '#optimizer#F2',
                  '#optimizer#popsize',
                  '#optimizer#propagate_heritage',
                  '#session', 
                  '#lineage',
                  '#meta-sigma']

for name in variables.index:
    hidden_columns.append(name + '#sigma')
    hidden_columns.append(name + '#sigma#sigma')
    
table = experiment.display_data(hip.Displays.TABLE)
table.update({'hide': hidden_columns,
              'order_by': [['#num-tell', 'asc']]})

plot = experiment.display_data(hip.Displays.PARALLEL_PLOT)
plot.update({'hide': [*hidden_columns, '#num-tell'],
             'order': ['#generation', *variables.index, '#loss']})

In [None]:
experiment.display()

In [None]:
with TemporaryDirectory() as tmpdir:
    print(f"After optimization loss is {recommendation.loss}")
    sim = model.eval(tmpdir, **recommendation.kwargs)
    for t in targets:
        synth.compare(t, sim, desc=t)
        plt.show()