# Modis-tempCNN mapping model
Train and save the Modis-tempCNN model for map generation 

In [1]:
import os
import json
import numpy as np
import pandas as pd

import initialise
from model_utils import reshape_data
from modelling_functions import create_models, run_experiment
from model_parameters import ModelParams

## Directories and Input files
Change these settings as required
- `input_dir`: Directory containing the data extracted from GEE and Globe-LFMC, the outputs from running the `Extract DEM Data.ipynb` and `Extract MODIS Data.ipynb` notebooks.
- `output_dir`: A sub-directory called `model_params['modelName']` (`Scenario_A`) will be created in this directory, where all outputs generated by this notebook will be written. 
- `temp_dir`: A temporary directory where model checkpoints are created. This directory should exist and be empty.
- `modis_csv`: The file containing extracted MODIS data for each sample, created by `Extract MODIS Data.ipynb`
- `smos_csv`: The file containing extracted SMOS data for each sample, created by `Extract MODIS Data.ipynb`
- `prism_csv`: The file containing extracted PRISM data for each sample, created by `Extract MODIS Data.ipynb`
- `aux_csv`: The file containing extracted DEM and other auxiliary data for each sample, created by `Extract DEM Data.ipynb`. This includes the labels and pre-processed site location and sampling date data from Globe-LFMC

In [2]:
input_dir = r'G:\My Drive\LFMC Data\LFMC_ensembles\Datasets'
output_dir = r'G:\My Drive\LFMC Data\LFMC_ensembles\Models'
temp_dir = r'C:\Temp\LFMC'
modis_csv = os.path.join(input_dir, 'modis_365days.csv')
aux_csv = os.path.join(input_dir, 'samples_365days.csv')

## Set up model parameters
### Architecture for Modis-tempCNN
These are the required settings for the architecture for Modis-tempCNN

In [4]:
# Customize model parameters
model_params = ModelParams(modis_layers=3, fc_layers=2)

model_params['modelClass'] = 'LfmcTempCnn'
model_params['dataSources'] = ['modis', 'aux']
model_params['modisFilename'] = modis_csv
model_params['auxFilename'] = aux_csv
model_params['auxColumns'] = ['Day_sin', 'Day_cos', 'Long_sin', 'Long_cos', 'Lat_norm', 'Elevation', 'Slope', 'Aspect_sin', 'Aspect_cos']
model_params['auxOneHotCols'] = []
model_params['auxAugment'] = False
model_params['dropoutRate'] = 0.5
model_params['batchSize'] = 32
model_params['epochs'] = 100
    
model_params['fc'][0]['units'] = 256
model_params['fc'][1]['units'] = 256

model_params['modisConv'][0]['filters'] = 32
model_params['modisConv'][1]['filters'] = 32
model_params['modisConv'][2]['filters'] = 32

model_params['modisConv'][0]['poolSize'] = 2
model_params['modisConv'][1]['poolSize'] = 3
model_params['modisConv'][2]['poolSize'] = 4

### Parameters for mapping models

In [5]:
model_params['saveModels'] = ['merge10']   # Save the Keras models so they can be used for map-wide estimates
model_params['splitMethod'] = 'byYear'     # Yearly models
model_params['splitYear'] = 2017           # Exclude data for 2017 or later
model_params['modelRuns'] = 1              # Modis-tempCNN isn't an ensemble so only 1 run is needed

### Other model parameters
These parameters can be changed. To find out more about any parameter, run `model_params.help('<parameter>')`. 

In [6]:
model_params['modelName'] = 'modis-tempCNN_map2017'
model_params['description'] = 'Build model for 2017 maps'
model_params['seedList'] = [441]
model_params['tempDir'] = temp_dir
model_params['modelDir'] = os.path.join(output_dir, model_params['modelName'])

restart = False     # Change to True if retrying/restarting this script
if not os.path.exists(model_params['modelDir']):
    os.makedirs(model_params['modelDir'])
elif not restart:   # Don't over-write something important!
    raise FileExistsError(f"{model_params['modelDir']} exists but restart not requested")

model_params

{'modelName': 'modis-tempCNN_map2017',
 'description': 'Build model for 2017 maps',
 'modelClass': 'LfmcTempCnn',
 'modelDir': 'G:\\My Drive\\LFMC Data\\LFMC_ensembles\\Models\\modis-tempCNN_map2017',
 'tempDir': 'C:\\Temp\\LFMC',
 'diagnostics': False,
 'dataSources': ['modis', 'aux'],
 'restartRun': None,
 'saveModels': ['merge10'],
 'saveTrain': None,
 'plotModel': True,
 'randomSeed': 1234,
 'modelSeed': 1234,
 'modelRuns': 1,
 'resplit': False,
 'seedList': [441],
 'maxWorkers': 1,
 'deterministic': False,
 'gpuDevice': 0,
 'gpuMemory': 0,
 'modisFilename': 'G:\\My Drive\\LFMC Data\\LFMC_ensembles\\Datasets\\modis_365days.csv',
 'modisChannels': 7,
 'modisNormalise': {'method': 'minMax', 'percentiles': 2},
 'modisStart': None,
 'modisEnd': None,
 'prismFilename': None,
 'prismChannels': 7,
 'prismNormalise': {'method': 'minMax', 'percentiles': 2},
 'prismStart': None,
 'prismEnd': None,
 'auxFilename': 'G:\\My Drive\\LFMC Data\\LFMC_ensembles\\Datasets\\samples_365days.csv',
 'aux

## Prepare the data

In [7]:
modis_data = pd.read_csv(modis_csv, index_col=0)
x_modis = reshape_data(np.array(modis_data), model_params['modisChannels'])
print(f'Modis shape: {x_modis.shape}')

aux_data = pd.read_csv(aux_csv, index_col=0)
y = aux_data[model_params['targetColumn']]

Modis shape: (66946, 365, 7)


## Build and run the model
Builds and trains the LFMC model. After training the model, several derived models are created and evaluated. The full list of derived models is:
- `base` - The fully trained model
- `best` - A model using the checkpoint with the best training loss
- `merge10` - A model created by merging the last 10 checkpoints. The checkpoints are merged by averaging the corresponding weights from each model.
- `ensemble10` - An ensembled model of the last 10 checkpoints. This model averages the predictions made by each model in the ensemble to make the final prediction.
- `merge_best10` - Similar to the merge10 model, but uses the 10 checkpoints with the lowest training/validation losses.

The model (if requested), predictions, and evaluation statistics are saved to `model_dir`. Predictions and evaluation statistics are also returned as attributes of the `model` object.

In [8]:
def is_experiment():
    try:
        return bool(experiment['tests'])
    except:
        return False

In [9]:
X = {'modis': x_modis}
with open(os.path.join(model_params['modelDir'], 'model_params.json'), 'w') as f:
    model_params.save(f)
model = create_models(model_params, aux_data, X, y)
display(getattr(model, 'all_stats', None))

Auxiliary columns: ['Day_sin', 'Day_cos', 'Long_sin', 'Long_cos', 'Lat_norm', 'Elevation', 'Slope', 'Aspect_sin', 'Aspect_cos']
modis shape: (66946, 365, 7)
aux shape: (66946, 9)
modis-tempCNN_map2017 training results: minLoss: 615.397, runTime: 1247.780


Unnamed: 0,Bias,R,R2,RMSE,ubRMSE,runTime
base,-2.3,0.78,0.6,24.01,23.9,0.85
best,-7.11,0.77,0.55,25.36,24.34,0.28
merge10,-4.52,0.78,0.6,23.96,23.53,0.3
ensemble10,-4.35,0.79,0.6,23.84,23.44,2.8
merge_best10,-4.76,0.78,0.6,24.04,23.56,0.28
