# Predict TVC from the gridded Sv MBES data, using linear regression models
This notebook executes the following steps:
* For each grid file:
    * Load model files
    * For each linear regression model:
        * Predict the `y_dimension` for each grid cell
    * Save the predicted grid to a netcdf file

In [1]:
import os
from glob import glob
from timbers_code.extract_and_model import predict_raster_from_model
from timbers_code.utils import create_folder_if_absent

## Set the parameters
* raster_files: netCDF files of gridded Sv MBES data to use for prediction
* model_files: linear regression model files to apply
* x_dimension: variable name of the grid file to be used as independent variable in the prediction
* exp_y_dimension: whether the dependent variable should be exponentiated (base 10) after prediction
* stack_coords: the xyz coordinates of the grids
* output_raster_path: the path where the netCDF files of the predicted grids should be saved

In [2]:
# data input
raster_files = glob('data/gridding/grid_*.nc')
model_files = glob('data/models/*.joblib')
# input parameters
x_dimension = 'mean_value_db'
exp_y_dimension = True
stack_coords = ("x","y","z")
# data output
output_raster_path = 'G:/Algorithm2/predictions_final'

# Raster predictions

In [3]:
%%time
output_files = []
for raster_file in raster_files:
    output_raster_path_for_raster_file = f'{output_raster_path}/{os.path.splitext(os.path.basename(raster_file))[0].replace("_"+x_dimension,"")}'
    create_folder_if_absent(output_raster_path_for_raster_file)
    for model_file in model_files:
        output_file = predict_raster_from_model(raster_file = raster_file,
                                  model_file = model_file,
                                  output_raster_path = output_raster_path_for_raster_file,
                                  x_dimension = x_dimension,
                                  exp_y_dimension = exp_y_dimension,
                                  stack_coords = stack_coords)
        output_files.append(output_file)
output_files

Wall time: 44min 29s


['G:/Algorithm2/predictions_final/grid_Multiingest_0095_6565_20690/grid_Multiingest_0095_6565_20690_mean_value_db_TVC_(200.0,_420.0].nc',
 'G:/Algorithm2/predictions_final/grid_Multiingest_0095_6565_20690/grid_Multiingest_0095_6565_20690_mean_value_db_Average_of_Turbidity_NTU.nc',
 'G:/Algorithm2/predictions_final/grid_Multiingest_0095_6565_20690/grid_Multiingest_0095_6565_20690_mean_value_db_Total_Volume_Concentration_[ppm].nc',
 'G:/Algorithm2/predictions_final/grid_Multiingest_0095_6565_20690/grid_Multiingest_0095_6565_20690_mean_value_db_TVC_(0.0,_3.0].nc',
 'G:/Algorithm2/predictions_final/grid_Multiingest_0095_6565_20690/grid_Multiingest_0095_6565_20690_mean_value_db_TVC_(3.0,_20.0].nc',
 'G:/Algorithm2/predictions_final/grid_Multiingest_0095_6565_20690/grid_Multiingest_0095_6565_20690_mean_value_db_TVC_(20.0,_200.0].nc',
 'G:/Algorithm2/predictions_final/grid_Multiingest_0095_6565_21092/grid_Multiingest_0095_6565_21092_mean_value_db_TVC_(200.0,_420.0].nc',
 'G:/Algorithm2/predic