# LFMC Estimation Experiment
Notebook to test LFMC modelling code changes

In [1]:
import os

import initialise
import common
from modelling_functions import create_models, run_experiment
from architecture_transfer import model_params
from scenarios import europe_scenario
from model_parameters import ExperimentParams

## Directories and Input files
Change these settings as required
- `input_dir`: Directory containing the data extracted from GEE and Globe-LFMC, the outputs from running the `Extract DEM Data.ipynb` and `Extract MODIS Data.ipynb` notebooks.
- `output_dir`: A sub-directory called `model_params['modelName']` (`Scenario_A`) will be created in this directory, where all outputs generated by this notebook will be written. 
- `temp_dir`: A temporary directory where model checkpoints are created. This directory should exist and be empty.
- `modis_csv`: The file containing extracted MODIS data for each sample, created by `Extract MODIS Data.ipynb`
- `prism_csv`: The file containing extracted PRISM data for each sample, created by `Extract PRISM Data.ipynb`
- `aux_csv`: The file containing extracted sample labels, DEM, climate zone and other auxiliary data, created by `Extract Auxiliary Data.ipynb`.

In [2]:
modis_csv = os.path.join(common.DATASETS_DIR, 'europe_modis_365days.csv')
era5_csv = os.path.join(common.DATASETS_DIR, 'europe_era5_365days.csv')
aux_csv = os.path.join(common.DATASETS_DIR, 'europe_samples_365days.csv')

## Set up experiment parameters
If the experiment dictionary contains a 'tests' key that is not 'falsy' (False, None, 0, empty list) it is assumed to be a list of tests to run. Each test will run with the specified model parameters. Model parameters not specified will be the same for each test, as set in the main model_params dictionary. A failed run can be restarted by setting the 'restart' key to the test that failed. This test and the remaining tests will then be run.

If 'tests' is 'falsy' then a single test will be run using the parameters in the main model_params dictionary.

For more help, after running this cell run `experiment.help()` or `experiment.help('<parameter>')`

In [3]:
folds_dir = os.path.join(common.MODELS_DIR, 'europe_gen-folds')
experiment = ExperimentParams({
    'name': 'europe_full_adabn_cn',
    'description': 'Europe: pretrained on CONUS; Adapt BN layers; all training samples',
    'tests': [],
    'restart': None, 
    'rerun': None,
    'resumeAllTests': False,
})

folds_dir = os.path.join(common.MODELS_DIR, 'europe_gen-folds')
pretrained_dir = os.path.join(common.MODELS_DIR, 'conus_base_models')

seeds = [9013, 1815, 5313, 3945, 3632, 3875, 1782, 1393, 3708, 2914,
         4522, 3368, 6379, 3009, 3806, 6579, 4075, 1056, 5261, 4752]
for n, s in enumerate(seeds):
    experiment['tests'].append({
        'testName': f'Ensemble {n+1}', 'randomSeed': s,
        'loadFolds': os.path.join(folds_dir, f'test{n}'),
        'pretrainedModel': os.path.join(pretrained_dir, f'test{n}')})

# Display experiment details
#experiment

## Set up model parameters
Set up and customise the model parameters. Leave all parameters as set here to run Scenario A. To find out more about any parameter, run `model_params.help('<parameter>')` after running this cell to create the ModelParams object.

In [4]:
# Customize model parameters
model_params['modelName'] = experiment['name']
model_params['description'] = experiment['description']
model_params['modelDir'] = os.path.join(common.MODELS_DIR, model_params['modelName'])
model_params['samplesFile'] = aux_csv
europe_scenario(model_params)

# Transfer learning parameters
model_params['transferModel'] = {'method': 'adabn'}
model_params['commonNormalise'] = True #False

# Other parameters
model_params['epochs'] = 1
model_params['evaluateEpochs'] = 1

# Multiprocessing parameters
# model_params['enableXla'] = False #True
model_params['mixedPrecision'] = None
model_params['maxWorkers'] = 2     # Number of workers (parallel processes)
model_params['gpuList'] = [0]      # List of GPUs to use
model_params['gpuMemory'] = 1536   # GPU memory for each worker

model_params

{'modelName': 'europe_full_adabn_cn',
 'testName': None,
 'test': None,
 'run': None,
 'fold': None,
 'description': 'Europe: pretrained on CONUS; Adapt BN layers; all training samples',
 'modelClass': 'LfmcTempCnn',
 'modelDir': 'G:\\My Drive\\LFMC Data\\Transfer_learning\\Models\\europe_full_adabn_cn',
 'tempDir': 'C:\\Temp\\LFMC',
 'diagnostics': False,
 'restartRun': None,
 'derivedModels': None,
 'saveModels': False,
 'saveFolds': True,
 'saveRunResults': False,
 'saveTrain': None,
 'saveValidation': False,
 'plotModel': True,
 'randomSeed': 1234,
 'modelSeed': 1234,
 'modelRuns': 20,
 'resplit': False,
 'seedList': [441,
  780,
  328,
  718,
  184,
  372,
  346,
  363,
  701,
  358,
  566,
  451,
  795,
  237,
  788,
  185,
  397,
  530,
  758,
  633],
 'maxWorkers': 2,
 'asyncRuns': True,
 'deterministic': False,
 'gpuDevice': 0,
 'gpuList': [0],
 'gpuMemory': 1536,
 'dataSources': ['optical', 'weather', 'aux'],
 'sourceNames': None,
 'deduplicate': False,
 'inputs': {},
 'sampl

In [5]:
model_params.add_input('optical', {'filename': modis_csv, 'channels': 7})
model_params.add_input('weather', {'filename': era5_csv, 'channels': 7})
model_params['inputs']

{'optical': {'filename': 'G:\\My Drive\\LFMC Data\\Transfer_learning\\Datasets\\europe_modis_365days.csv',
  'normalise': {'method': 'minMax', 'percentiles': 2},
  'channels': 7,
  'includeChannels': [],
  'start': None,
  'end': None},
 'weather': {'filename': 'G:\\My Drive\\LFMC Data\\Transfer_learning\\Datasets\\europe_era5_365days.csv',
  'normalise': {'method': 'minMax', 'percentiles': 2},
  'channels': 7,
  'includeChannels': [],
  'start': None,
  'end': None}}

## Build and run the models
Builds and trains the LFMC models.

All models, predictions, evaluation statistics, and plots of test results are saved to `model_dir`, with each test and run saved to a separate sub-directory. For each model created, predictions and evaluation statistics are also returned as attributes of the `model` object. These are stored as nested lists, the structure for a full experiment is:
- Tests (omitted if not an experiment)
  - Runs (omitted for a single run)
    - Folds (for k-fold splitting)

In [6]:
models = run_experiment(experiment, model_params)
for model in models:
    display(getattr(model, 'test_stats', None))

Experiment europe_full_adabn_cn - Europe: pretrained on CONUS; Adapt BN layers; all training samples

Reading samples file G:\My Drive\LFMC Data\Transfer_learning\Datasets\europe_samples_365days.csv
Reading optical file G:\My Drive\LFMC Data\Transfer_learning\Datasets\europe_modis_365days.csv
Reading weather file G:\My Drive\LFMC Data\Transfer_learning\Datasets\europe_era5_365days.csv

----------------------------------------------------------------------

Test 0: Ensemble 1 - {'randomSeed': 9013, 'loadFolds': 'G:\\My Drive\\LFMC Data\\Transfer_learning\\Models\\europe_gen-folds\\test0', 'pretrainedModel': 'G:\\My Drive\\LFMC Data\\Transfer_learning\\Models\\conus_base_models\\test0'}

Setting target to "LFMC value"
Auxiliary columns: ['Long_sin', 'Long_cos', 'Lat_norm']
One-hot encoded columns: ['Czone3']
Optical input shape: (8398, 365, 7)
Prepared optical shape: (8398, 365, 7)
Weather input shape: (8398, 365, 7)
Prepared weather shape: (8398, 365, 7)
Prepared aux shape: (8398, 18)
P

Unnamed: 0,Count,RMSE,R2,Bias
base,842,23.88,0.32,-6.3


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.57,0.4,-5.04


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.1,0.42,-5.11


Unnamed: 0,Count,RMSE,R2,Bias
base,842,23.14,0.36,-7.64


Unnamed: 0,Count,RMSE,R2,Bias
base,842,21.95,0.43,-7.14


Unnamed: 0,Count,RMSE,R2,Bias
base,842,23.33,0.35,-8.22


Unnamed: 0,Count,RMSE,R2,Bias
base,842,21.61,0.45,-3.55


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.22,0.41,-2.42


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.62,0.39,-4.71


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.74,0.39,-6.36


Unnamed: 0,Count,RMSE,R2,Bias
base,842,21.67,0.44,-3.94


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.11,0.42,-3.29


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.38,0.41,-5.4


Unnamed: 0,Count,RMSE,R2,Bias
base,842,21.82,0.43,-3.44


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.31,0.41,-5.26


Unnamed: 0,Count,RMSE,R2,Bias
base,842,23.18,0.36,-6.46


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.88,0.38,-5.85


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.71,0.39,-3.06


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.39,0.41,-5.16


Unnamed: 0,Count,RMSE,R2,Bias
base,842,22.24,0.41,-3.47


In [7]:
from display_utils import print_heading
print_heading('Results Summary', line_char='=', blank_before=2, blank_after=0)
for num, model in enumerate(models):
    test = experiment['tests'][num]
    try:
        test_name = test.get('testName', None) or experiment['testNames'][num]
    except:
        test_name = '<unnamed test>'
    print_heading(f'Test {num}: {test_name}', blank_before=1, blank_after=0)
    epochs = getattr(model, 'epoch_test_stats', {})  #bool(model.params['evaluateEpochs'])
    if epochs:
        for epoch, epoch_stats in epochs.items():
            print(f"\n{epoch}:")
            print(epoch_stats)
        print(f"\nFull run: epoch {model.params['epochs']}:")
    print(getattr(model, 'test_stats', None))




Results Summary

Test 0: Ensemble 1
------------------

epoch0:
      Count   RMSE    R2   Bias
base    842  41.35 -1.03 -33.29

Full run: epoch 1:
      Count   RMSE    R2  Bias
base    842  23.88  0.32  -6.3

Test 1: Ensemble 2
------------------

epoch0:
      Count   RMSE   R2   Bias
base    842  40.06 -0.9 -32.32

Full run: epoch 1:
      Count   RMSE   R2  Bias
base    842  22.57  0.4 -5.04

Test 2: Ensemble 3
------------------

epoch0:
      Count   RMSE    R2   Bias
base    842  38.17 -0.73 -29.69

Full run: epoch 1:
      Count  RMSE    R2  Bias
base    842  22.1  0.42 -5.11

Test 3: Ensemble 4
------------------

epoch0:
      Count  RMSE    R2  Bias
base    842  39.6 -0.86 -31.0

Full run: epoch 1:
      Count   RMSE    R2  Bias
base    842  23.14  0.36 -7.64

Test 4: Ensemble 5
------------------

epoch0:
      Count   RMSE   R2   Bias
base    842  29.04 -0.0 -16.24

Full run: epoch 1:
      Count   RMSE    R2  Bias
base    842  21.95  0.43 -7.14

Test 5: Ensemble 6
----

In [24]:
train_sizes = {}
for t in models:
    for f in t[0]:
        fold = f.params['fold'].split('_')[0]
        size = f.params['trainSize']
        train_sizes.setdefault(fold, [])
        train_sizes[fold].append(size)
train_sizes

{'2016': [6248,
  7061,
  7110,
  4237,
  6559,
  7316,
  5815,
  4966,
  5665,
  7345,
  6288,
  5358,
  7519,
  5679,
  4730,
  6728,
  7307,
  6393,
  7025,
  3931,
  6416,
  5155,
  6706,
  6379,
  5109,
  5092,
  7336,
  7119,
  6623,
  5889,
  7548,
  4596,
  7204,
  5612,
  5958,
  5882,
  6500,
  5197,
  6780,
  6179,
  5477,
  5824,
  7174,
  6181,
  6209,
  6591,
  6607,
  5249,
  6281,
  4794,
  6951,
  6630,
  6551,
  5701,
  6370,
  6034,
  5850,
  6966,
  6222,
  5618,
  6810,
  7842,
  5341,
  4663,
  5109,
  6215,
  6158,
  7174,
  7257,
  6391,
  3835,
  7173,
  6912,
  4873,
  5416,
  7455,
  3686,
  7833,
  7619,
  5518],
 '2015': [6053,
  6834,
  6867,
  4113,
  6341,
  7085,
  5621,
  4820,
  5496,
  7099,
  6091,
  5181,
  7279,
  5496,
  4594,
  6498,
  7075,
  6186,
  6802,
  3804,
  6201,
  4990,
  6495,
  6181,
  4943,
  4950,
  7089,
  6885,
  6408,
  5709,
  7293,
  4457,
  6960,
  5441,
  5774,
  5692,
  6303,
  5019,
  6555,
  5990,
  5285,
  5645,
  6955,

In [25]:
import numpy as np
for f, sizes in train_sizes.items():
    print(f, np.array(sizes).mean())

2016 6164.0
2015 5966.75
2014 5768.0
