In [None]:
%matplotlib inline
import os
import sys

sys.path.append("../../")

from sapsan.lib.backends.fake import FakeBackend
from sapsan.lib.backends.mlflow import MLflowBackend
from sapsan.lib.data.hdf5_dataset import HDF5Dataset
from sapsan.lib.data import EquidistantSampling
from sapsan.lib.estimator import PICAE, PICAEConfig
from sapsan.lib.experiments.evaluate import Evaluate
from sapsan.lib.experiments.train import Train
from sapsan.utils.plot import model_graph

In [None]:
#--- Experiment tracking backend ---
experiment_name = "CNN experiment"

#Fake (disable backend)
tracking_backend = FakeBackend(experiment_name)

#MLflow
#launch mlflow with: mlflow ui --port=9000
#uncomment tracking_backend to use mlflow

MLFLOW_HOST = "localhost"
MLFLOW_PORT = 9000

#tracking_backend = MLflowBackend(experiment_name, MLFLOW_HOST, MLFLOW_PORT)

In [None]:
#--- Data setup ---
#In the intereset of loading and training multiple timesteps
#one can specify which checkpoints to use and where
#they appear in the path via syntax: {checkpoint:format}
#
#Next, you need to specify which features to load; let's assume 
#        path = "{feature}.h5"
#
# 1) If in different files, then specify features directly;
#    The default HDF5 label will be the last label in the file
#    Ex: features = ['velocity', 'denisty', 'pressure']
# 2) If in the same, then duplicate the name in features
#    and specify which labels to pull
#    Ex: features = ["data", "data", "data"]
#        feature_labels = ['velocity', 'density', 'pressure']

path = "data/t{checkpoint:1.0f}/picae_random_{feature}.h5"
features = ['u']
target = ['u']

#Dimensionality of your data per axis
INPUT_SIZE = (32,32,32)

#Reduce dimensionality of each axis to
SAMPLE_TO = (16,16,16)

#Number of batches
BATCH_NUM = 1

#Sampler to use for reduction
sampler = EquidistantSampling(INPUT_SIZE, SAMPLE_TO)

In [None]:
#Load the data
data_loader = HDF5Dataset(path = path,
                          features = features,
                          target = target,
                          checkpoints = [0],
                          batch_num = BATCH_NUM,
                          input_size = INPUT_SIZE,
                          sampler = sampler)
x, y = data_loader.load_numpy()

loaders = data_loader.convert_to_torch([x, y])

In [None]:
estimator = PICAE(config=PICAEConfig(nfilters = 6, 
                                     kernel_size = (3,3,3), 
                                     enc_nlayers = 3,                        
                                     dec_nlayers = 3, 
                                     n_epochs = 1,
                                     patience = 10, 
                                     min_delta = 1e-5))

In [None]:
#Set the experiment
training_experiment = Train(backend = tracking_backend,
                            model = estimator,
                            loaders = loaders,
                            data_parameters = data_loader)

#Train the model
training_experiment.run()

In [None]:
#--- Test the model ---
#Load the data
data_loader = HDF5Dataset(path = path,
                          features = features,
                          target = target,
                          checkpoints = [0],
                          batch_num = BATCH_NUM,
                          input_size = INPUT_SIZE,
                          sampler = sampler)
x, y = data_loader.load_numpy()

#Set the test experiment
#loaders need to take in the same size data as the individual training batch
evaluation_experiment = Evaluate(backend=tracking_backend,
                                 model=training_experiment.model,
                                 loaders = [x[:BATCH_NUM],y[:BATCH_NUM]],
                                 data_parameters = data_loader)


#Test the model
target_cube, pred_cube = evaluation_experiment.run()