# Out-of_site Mapping Model
Creates the out-of-site model used for mapping. It creates the ensemble of 20 models and saves the Keras models as HDF5 files. These files can then be loaded and used to generate the LFMC estimates needed to create LFMC maps.

In [1]:
import os
import numpy as np
import pandas as pd

import initialise
import common
from model_utils import reshape_data
from modelling_functions import create_models
from architecture_out_of_site import model_params

## Input files
Change these settings as required
- `modis_csv`: The file containing extracted MODIS data for each sample, created by `Extract MODIS Data.ipynb`
- `prism_csv`: The file containing extracted PRISM data for each sample, created by `Extract PRISM Data.ipynb`
- `aux_csv`: The file containing extracted DEM and other auxiliary data for each sample, created by `Extract DEM Data.ipynb`. This includes the labels and pre-processed site location and sampling date data from Globe-LFMC

In [2]:
modis_csv = os.path.join(common.DATASETS_DIR, 'modis_365days.csv')
prism_csv = os.path.join(common.DATASETS_DIR, 'prism_365days.csv')
aux_csv = os.path.join(common.DATASETS_DIR, 'samples_365days.csv')

## Set up model parameters

### Model parameters settings
To find out more about any parameter, run `model_params.help('<parameter>')`. 

In [3]:
model_params['modelName'] = 'out-of-site_map2017'
model_params['description'] = 'Create an ensemble of out-of-site models for 2017 LFMC maps'
model_params['modisFilename'] = modis_csv
model_params['prismFilename'] = prism_csv
model_params['auxFilename'] = aux_csv
model_params['saveModels'] = True       # Save the models to use for map-wide estimates
model_params['splitMethod'] = None
model_params['tempDir'] = common.TEMP_DIR
model_params['modelDir'] = os.path.join(common.MODELS_DIR, model_params['modelName'])
model_params['seedList'] = [
    441, 780, 328, 718, 184, 372, 346, 363, 701, 358,
    566, 451, 795, 237, 788, 185, 397, 530, 758, 633,
    632, 941, 641, 519, 162, 215, 578, 919, 917, 585,
    914, 326, 334, 366, 336, 413, 111, 599, 416, 230,
    191, 700, 697, 332, 910, 331, 771, 539, 575, 457
]

restart = False     # Change to True if retrying/restarting this script
if not os.path.exists(model_params['modelDir']):
    os.makedirs(model_params['modelDir'])
elif not restart:   # Don't over-write something important!
    raise FileExistsError(f"{model_params['modelDir']} exists but restart not requested")

model_params

{'modelName': 'out-of-site_map2017',
 'description': 'Create an ensemble of out-of-site models for 2017 LFMC maps',
 'modelClass': 'LfmcTempCnn',
 'modelDir': 'G:\\My Drive\\LFMC Data\\multi_modal_LFMC\\Models\\out-of-site_map2017',
 'tempDir': 'C:\\Temp\\LFMC',
 'diagnostics': False,
 'dataSources': ['modis', 'prism', 'aux'],
 'restartRun': None,
 'derivedModels': None,
 'saveModels': True,
 'saveTrain': None,
 'saveValidation': True,
 'plotModel': True,
 'randomSeed': 1234,
 'modelSeed': 1234,
 'modelRuns': 20,
 'resplit': False,
 'seedList': [441,
  780,
  328,
  718,
  184,
  372,
  346,
  363,
  701,
  358,
  566,
  451,
  795,
  237,
  788,
  185,
  397,
  530,
  758,
  633,
  632,
  941,
  641,
  519,
  162,
  215,
  578,
  919,
  917,
  585,
  914,
  326,
  334,
  366,
  336,
  413,
  111,
  599,
  416,
  230,
  191,
  700,
  697,
  332,
  910,
  331,
  771,
  539,
  575,
  457],
 'maxWorkers': 1,
 'deterministic': False,
 'gpuDevice': 0,
 'gpuMemory': 0,
 'modisFilename': 'G:\

## Prepare the data

In [4]:
modis_data = pd.read_csv(model_params['modisFilename'], index_col=0)
x_modis = reshape_data(np.array(modis_data), model_params['modisChannels'])
print(f'Modis shape: {x_modis.shape}')

prism_data = pd.read_csv(model_params['prismFilename'], index_col=0)
x_prism = reshape_data(np.array(prism_data), model_params['prismChannels'])
print(f'Prism shape: {x_prism.shape}')

aux_data = pd.read_csv(model_params['auxFilename'], index_col=0)
y = aux_data[model_params['targetColumn']]

Modis shape: (66946, 365, 7)
Prism shape: (66946, 365, 7)


## Build and run the model
Builds and trains the LFMC model. After training the model, several derived models are created and evaluated. The full list of derived models is:
- `base` - The fully trained model
- `merge10` - A model created by merging the last 10 checkpoints. The checkpoints are merged by averaging the corresponding weights from each model.

The model and training statistics are saved to `model_dir`.

In [5]:
X = {'modis': x_modis, 'prism': x_prism}
with open(os.path.join(model_params['modelDir'], 'model_params.json'), 'w') as f:
    model_params.save(f)
model = create_models(model_params, aux_data, X, y)

Auxiliary columns: ['Elevation', 'Slope', 'Aspect_sin', 'Aspect_cos', 'Long_sin', 'Long_cos', 'Lat_norm']
modis shape: (66946, 365, 7)
prism shape: (66946, 365, 7)
aux shape: (66946, 36)
