# LFMC Estimation Experiment
Notebook to test LFMC modelling code changes

## Set up experiment parameters
If the experiment dictionary contains a 'tests' key that is not 'falsy' (False, None, 0, empty list) it is assumed to be a list of tests to run. Each test will run with the specified model parameters. Model parameters not specified will be the same for each test, as set in the main model_params dictionary. A failed run can be restarted by setting the 'restart' key to the test that failed. This test and the remaining tests will then be run.

If 'tests' is 'falsy' then a single test will be run using the parameters in the main model_params dictionary.

For more help, after running this cell run `experiment.help()` or `experiment.help('<parameter>')`

In [1]:
def set_up_experiment():
    import os
    import initialise
    import common
    experiment = {
        'name': 'australia_sourcerer_800',
        'description': 'Australia: pretrained on CONUS; Sourcerer, targetMax=800; all training samples',
        'tests': [],
        'restart': False, 
        'rerun': None,
        'resumeAllTests': False,
    } #)
    folds_dir = os.path.join(common.MODELS_DIR, 'australia_gen_folds')
    pretrained_dir = os.path.join(common.MODELS_DIR, 'conus_base_models')

    seeds = [9013, 1815, 5313, 3945, 3632, 3875, 1782, 1393, 3708, 2914,
             4522, 3368, 6379, 3009, 3806, 6579, 4075, 1056, 5261, 4752]
    for n, s in enumerate(seeds):
        experiment['tests'].append({
            'testName': f'Ensemble {n+1}', 'randomSeed': s,
            'loadFolds': os.path.join(folds_dir, f'test{n}'),
            'pretrainedModel': os.path.join(pretrained_dir, f'test{n}')})

    return experiment

## Set up model parameters
Set up and customise the model parameters. Leave all parameters as set here to run Scenario A. To find out more about any parameter, run `model_params.help('<parameter>')` after running this cell to create the ModelParams object.

In [2]:
def set_up_model_params(experiment):
    import os
    import initialise
    import common
    from architecture_transfer import model_params
    model_params['modelName'] = experiment['name']
    model_params['description'] = experiment['description']
    model_params['modelRuns'] = 20
    model_params['plotModel'] = False
    
    # Globe-LFMC Column Names
    model_params['splitColumn'] = 'Group2'
    model_params['yearColumn'] = 'Sampling year'
    
    # Train/test split parameters
    model_params['splitMethod'] = 'byValue'
    model_params['splitFolds'] = 4
    model_params['splitYear'] = 2014
    model_params['yearFolds'] = 3
    model_params['splitMax'] = True
    model_params['saveFolds'] = True

    # Transfer learning parameters
    model_params['pretrainedModel'] = os.path.join(common.MODELS_DIR, 'conus_base_models', 'test1')
    model_params['transferModel'] = {'method': 'sourcerer', 'targetMax': 800}
    model_params['commonNormalise'] = False

    # Other parameters
    model_params['epochs'] = 1000
    model_params['evaluateEpochs'] = 100
    model_params['derivedModels'] = None
    model_params['seedList'] = [
        441, 780, 328, 718, 184, 372, 346, 363, 701, 358,
        566, 451, 795, 237, 788, 185, 397, 530, 758, 633,
        632, 941, 641, 519, 162, 215, 578, 919, 917, 585,
        914, 326, 334, 366, 336, 413, 111, 599, 416, 230,
        191, 700, 697, 332, 910, 331, 771, 539, 575, 457
    ]
    model_params['maxWorkers'] = 24     # Number of workers (parallel processes)
    model_params['gpuList'] = [0, 1]    # List of GPUs to use
    model_params['gpuMemory'] = 256     # GPU memory for each worker
    return model_params

## Build and run the models
Builds and trains the LFMC models.

All models, predictions, evaluation statistics, and plots of test results are saved to `model_dir`, with each test and run saved to a separate sub-directory. For each model created, predictions and evaluation statistics are also returned as attributes of the `model` object. These are stored as nested lists, the structure for a full experiment is:
- Tests (omitted if not an experiment)
  - Runs (omitted for a single run)
    - Folds (for k-fold splitting)

In [3]:
def run_job(experiment): #, model_params):
    import os
    import initialise
    import common
    from modelling_functions import run_experiment
    from model_parameters import ExperimentParams

    os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/lib/cuda'

    # Input files
    modis_csv = os.path.join(common.DATASETS_DIR, 'australia_modis_365days.csv')
    era5_csv = os.path.join(common.DATASETS_DIR, 'australia_era5_365days.csv')
    aux_csv = os.path.join(common.DATASETS_DIR, 'australia_samples_365days.csv')

    # Experiment parameters
    experiment = ExperimentParams(experiment)

    # Model parameters
    model_params = set_up_model_params(experiment)
    model_params['tempDir'] = common.TEMP_DIR
    model_params['modelDir'] = os.path.join(common.MODELS_DIR, model_params['modelName'])
    
    # Model inputs
    model_params['samplesFile'] = aux_csv
    model_params.add_input('optical', {'filename': modis_csv, 'channels': 7})
    model_params.add_input('weather', {'filename': era5_csv, 'channels': 7})

    models = run_experiment(experiment, model_params)

    # Dask doesn't seem to like returning model_list objects, so we can't return the models
    return 'Finished'

In [4]:
from dask_jobqueue import SLURMCluster
from distributed import Client

tests_per_run = 5
experiment = set_up_experiment()
first_test = experiment['restart'] or 0
num_tests = len(experiment['tests'])
experiment['testsPerRun'] = tests_per_run

In [5]:
for next in range(first_test, num_tests, tests_per_run):
    if next > first_test:
        experiment['restart'] = next
        
    cluster = SLURMCluster(
        memory="128g", processes=1, cores=16, job_extra_directives=["--gres=gpu:2"], nanny=False
    )
    cluster.scale(1)
    client = Client(cluster) 

    future = client.submit(run_job, experiment) #, model_params)
    print(f'Tests {next} - {min(next+tests_per_run, num_tests)} result: {future.result()}')

    client.shutdown()

Tests 0 - 5 result: Finished
Tests 5 - 10 result: Finished
Tests 10 - 15 result: Finished
Tests 15 - 20 result: Finished
