# LFMC Estimation Experiment
Notebook to test LFMC modelling code changes

In [1]:
import os

import initialise
import common
from modelling_functions import create_models, run_experiment
from architecture_transfer import model_params
from scenarios import europe_scenario
from model_parameters import ExperimentParams

## Directories and Input files
Change these settings as required
- `input_dir`: Directory containing the data extracted from GEE and Globe-LFMC, the outputs from running the `Extract DEM Data.ipynb` and `Extract MODIS Data.ipynb` notebooks.
- `output_dir`: A sub-directory called `model_params['modelName']` (`Scenario_A`) will be created in this directory, where all outputs generated by this notebook will be written. 
- `temp_dir`: A temporary directory where model checkpoints are created. This directory should exist and be empty.
- `modis_csv`: The file containing extracted MODIS data for each sample, created by `Extract MODIS Data.ipynb`
- `prism_csv`: The file containing extracted PRISM data for each sample, created by `Extract PRISM Data.ipynb`
- `aux_csv`: The file containing extracted sample labels, DEM, climate zone and other auxiliary data, created by `Extract Auxiliary Data.ipynb`.

In [2]:
modis_csv = os.path.join(common.DATASETS_DIR, 'europe_modis_365days.csv')
era5_csv = os.path.join(common.DATASETS_DIR, 'europe_era5_365days.csv')
aux_csv = os.path.join(common.DATASETS_DIR, 'europe_samples_365days.csv')

## Set up experiment parameters
If the experiment dictionary contains a 'tests' key that is not 'falsy' (False, None, 0, empty list) it is assumed to be a list of tests to run. Each test will run with the specified model parameters. Model parameters not specified will be the same for each test, as set in the main model_params dictionary. A failed run can be restarted by setting the 'restart' key to the test that failed. This test and the remaining tests will then be run.

If 'tests' is 'falsy' then a single test will be run using the parameters in the main model_params dictionary.

For more help, after running this cell run `experiment.help()` or `experiment.help('<parameter>')`

In [7]:
folds_dir = os.path.join(common.MODELS_DIR, 'europe_gen-folds')
experiment = ExperimentParams({
    'name': 'europe_500_adabn',
    'description': 'Europe: pretrained on CONUS; Adapt BN layers; 500 training samples',
    'tests': [],
    'restart': None, 
    'rerun': None,
    'resumeAllTests': False,
})

models_dir = os.path.join(common.DATA_DIR, 'LFMC_CN_models')
folds_dir = os.path.join(common.MODELS_DIR, 'europe_gen-folds')
pretrained_dir = os.path.join(common.MODELS_DIR, 'conus_base_models')

seeds = [9013, 1815, 5313, 3945, 3632, 3875, 1782, 1393, 3708, 2914,
         4522, 3368, 6379, 3009, 3806, 6579, 4075, 1056, 5261, 4752]
for n, s in enumerate(seeds):
    experiment['tests'].append({
        'testName': f'Ensemble {n+1}', 'randomSeed': s,
        'loadFolds': os.path.join(folds_dir, f'test{n}'),
        'pretrainedModel': os.path.join(pretrained_dir, f'test{n}')})

# Display experiment details
#experiment

## Set up model parameters
Set up and customise the model parameters. Leave all parameters as set here to run Scenario A. To find out more about any parameter, run `model_params.help('<parameter>')` after running this cell to create the ModelParams object.

In [4]:
# Customize model parameters
model_params['modelName'] = experiment['name']
model_params['description'] = experiment['description']
model_params['modelDir'] = os.path.join(models_dir, model_params['modelName'])
model_params['samplesFile'] = aux_csv
model_params['samplesFilter'] = {'apply': 'train', 'method': 'random', 'params': [500]}
europe_scenario(model_params)

# Transfer learning parameters
model_params['transferModel'] = {'method': 'adabn'}
model_params['commonNormalise'] = True #False

# Other parameters
model_params['epochs'] = 1
model_params['evaluateEpochs'] = 1

# Multiprocessing parameters
# model_params['enableXla'] = False #True
model_params['mixedPrecision'] = None
model_params['maxWorkers'] = 3     # Number of workers (parallel processes)
model_params['gpuList'] = [0]    # List of GPUs to use
model_params['gpuMemory'] = 768     # GPU memory for each worker

model_params

{'modelName': 'europe_500_adabn',
 'testName': None,
 'test': None,
 'run': None,
 'fold': None,
 'description': 'Europe: pretrained on CONUS; Adapt BN layers; 500 training samples',
 'modelClass': 'LfmcTempCnn',
 'modelDir': 'G:\\My Drive\\LFMC Data\\Transfer_learning\\LFMC_CN_models\\europe_500_adabn',
 'tempDir': 'C:\\Temp\\LFMC',
 'diagnostics': False,
 'restartRun': None,
 'derivedModels': None,
 'saveModels': False,
 'saveFolds': True,
 'saveRunResults': False,
 'saveTrain': None,
 'saveValidation': False,
 'plotModel': True,
 'randomSeed': 1234,
 'modelSeed': 1234,
 'modelRuns': 20,
 'resplit': False,
 'seedList': [441,
  780,
  328,
  718,
  184,
  372,
  346,
  363,
  701,
  358,
  566,
  451,
  795,
  237,
  788,
  185,
  397,
  530,
  758,
  633],
 'maxWorkers': 3,
 'asyncRuns': True,
 'deterministic': False,
 'gpuDevice': 0,
 'gpuList': [0],
 'gpuMemory': 768,
 'dataSources': ['optical', 'weather', 'aux'],
 'sourceNames': None,
 'deduplicate': False,
 'inputs': {},
 'sample

In [5]:
model_params.add_input('optical', {'filename': modis_csv, 'channels': 7})
model_params.add_input('weather', {'filename': era5_csv, 'channels': 7})
model_params['inputs']

{'optical': {'filename': 'G:\\My Drive\\LFMC Data\\Transfer_learning\\Datasets\\europe_modis_365days.csv',
  'normalise': {'method': 'minMax', 'percentiles': 2},
  'channels': 7,
  'includeChannels': [],
  'start': None,
  'end': None},
 'weather': {'filename': 'G:\\My Drive\\LFMC Data\\Transfer_learning\\Datasets\\europe_era5_365days.csv',
  'normalise': {'method': 'minMax', 'percentiles': 2},
  'channels': 7,
  'includeChannels': [],
  'start': None,
  'end': None}}

## Build and run the models
Builds and trains the LFMC models.

All models, predictions, evaluation statistics, and plots of test results are saved to `model_dir`, with each test and run saved to a separate sub-directory. For each model created, predictions and evaluation statistics are also returned as attributes of the `model` object. These are stored as nested lists, the structure for a full experiment is:
- Tests (omitted if not an experiment)
  - Runs (omitted for a single run)
    - Folds (for k-fold splitting)

In [10]:
models = run_experiment(experiment, model_params)
# for model in models:
#     display(getattr(model, 'test_stats', None))

Experiment europe_500_adabn - Europe: pretrained on CONUS; Adapt BN layers; 500 training samples

Reading samples file G:\My Drive\LFMC Data\Transfer_learning\Datasets\europe_samples_365days.csv
Reading optical file G:\My Drive\LFMC Data\Transfer_learning\Datasets\europe_modis_365days.csv
Reading weather file G:\My Drive\LFMC Data\Transfer_learning\Datasets\europe_era5_365days.csv

----------------------------------------------------------------------

Test 0: Ensemble 1 - {'randomSeed': 9013, 'loadFolds': 'G:\\My Drive\\LFMC Data\\Transfer_learning\\LFMC_models\\europe_gen-folds\\test0', 'pretrainedModel': 'G:\\My Drive\\LFMC Data\\Transfer_learning\\LFMC_models\\conus_base_models\\test0'}

Setting target to "LFMC value"
Auxiliary columns: ['Long_sin', 'Long_cos', 'Lat_norm']
One-hot encoded columns: ['Czone3']
Optical input shape: (8398, 365, 7)
Prepared optical shape: (8398, 365, 7)
Weather input shape: (8398, 365, 7)
Prepared weather shape: (8398, 365, 7)
Prepared aux shape: (8398,

In [11]:
from display_utils import print_heading
print_heading('Results Summary', line_char='=', blank_before=2, blank_after=0)
for num, model in enumerate(models):
    test = experiment['tests'][num]
    try:
        test_name = test.get('testName', None) or experiment['testNames'][num]
    except:
        test_name = '<unnamed test>'
    print_heading(f'Test {num}: {test_name}', blank_before=1, blank_after=0)
    epochs = getattr(model, 'epoch_test_stats', {})  #bool(model.params['evaluateEpochs'])
    if epochs:
        for epoch, epoch_stats in epochs.items():
            print(f"\n{epoch}:")
            print(epoch_stats)
        print(f"\nFull run: epoch {model.params['epochs']}:")
    print(getattr(model, 'test_stats', None))




Results Summary

Test 0: Ensemble 1
------------------

epoch0:
      Count   RMSE    R2  Bias
base    842  24.98  0.26 -6.91

Full run: epoch 1:
      Count   RMSE    R2  Bias
base    842  27.97  0.07  18.1

Test 1: Ensemble 2
------------------

epoch0:
      Count   RMSE    R2  Bias
base    842  24.73  0.27 -8.27

Full run: epoch 1:
      Count   RMSE    R2   Bias
base    842  29.35 -0.02  19.74

Test 2: Ensemble 3
------------------

epoch0:
      Count   RMSE   R2  Bias
base    842  24.34  0.3 -5.59

Full run: epoch 1:
      Count   RMSE    R2   Bias
base    842  28.83  0.01  20.12

Test 3: Ensemble 4
------------------

epoch0:
      Count   RMSE    R2  Bias
base    842  24.54  0.29 -5.68

Full run: epoch 1:
      Count   RMSE   R2   Bias
base    842  27.48  0.1  17.83

Test 4: Ensemble 5
------------------

epoch0:
      Count   RMSE    R2  Bias
base    842  25.25  0.24  7.99

Full run: epoch 1:
      Count   RMSE    R2   Bias
base    842  27.22  0.12  17.66

Test 5: Ensemble 