In [None]:
%matplotlib inline
import os
import sys

sys.path.append("../../")

from sapsan.lib.backends import MLflowBackend
from sapsan.lib.data import HDF5Dataset, EquidistantSampling, flatten
from sapsan.lib.estimator.cnn.cnn3d_estimator import CNN3d, CNN3dConfig
from sapsan import Train, Evaluate, model_graph

In [None]:
#--- Experiment tracking backend ---

#MLflow - the server will be launched automatically
#in case it won't, type in cmd: mlflow ui --port=9000
#uncomment tracking_backend to use mlflow

experiment_name = "CNN experiment"
#tracking_backend = MLflowBackend(experiment_name, host="localhost", port=9000)

In [None]:
#--- Data setup ---
#In the intereset of loading and training multiple timesteps
#one can specify which checkpoints to use and where
#they appear in the path via syntax: {checkpoint:format}
#
#Next, you need to specify which features to load; let's assume 
#        path = "{feature}.h5"
#
# 1) If in different files, then specify features directly;
#    The default HDF5 label will be the last label in the file
#    Ex: features = ['velocity', 'denisty', 'pressure']
# 2) If in the same, then duplicate the name in features
#    and specify which labels to pull
#    Ex: features = ["data", "data", "data"]
#        feature_labels = ['velocity', 'density', 'pressure']

path = "data/t{checkpoint:1.0f}/{feature}_dim32_fm15.h5"
features = ['u']
target = ['u']

#Dimensionality of your data in format (D,H,W)
INPUT_SIZE = (32,32,32)

#Reduce dimensionality to the following in format (D,H,W)
SAMPLE_TO = (16,16,16)

#Sampler to use for reduction
sampler = EquidistantSampling(SAMPLE_TO)

In [None]:
#Load the data
data_loader = HDF5Dataset(path=path,
                          features=features,
                          target=target,
                          checkpoints=[0],
                          input_size=INPUT_SIZE,
                          sampler=sampler,
                          shuffle = False,
                          train_fraction = 1)

x, y = data_loader.load_numpy()
y = flatten(y)

#convert_to_torch takes in a list or a numpy array
loaders = data_loader.convert_to_torch([x, y])

In [None]:
#Machine Learning model to use

#Configuration of the model parameters:
#    n_epochs = number of epochs (iterations)
#    patience = number of epochs to run beyond convergence
#    min_delta = loss based convergence cut-off
estimator = CNN3d(config = CNN3dConfig(n_epochs=5, patience=10, min_delta=1e-5),
                  loaders = loaders)

In [None]:
#--- Train the model ---
#Set the experiment
training_experiment = Train(model=estimator,
                            #backend=tracking_backend, #uncomment to use mlflow
                            data_parameters = data_loader)

#Train the model
estimator = training_experiment.run()

In [None]:
%matplotlib inline

#--- Test the model ---
#Load the test data
data_loader = HDF5Dataset(path=path,
                   features=features,
                   target=target,
                   checkpoints=[0],
                   input_size=INPUT_SIZE,
                   sampler=sampler)
x, y = data_loader.load_numpy()
loaders = data_loader.convert_to_torch([x, y])

#Set the test experiment
estimator.loaders = loaders
evaluation_experiment = Evaluate(model = estimator,
                                 #backend=tracking_backend, #uncomment to use mlflow                                 
                                 data_parameters = data_loader)


#Test the model
#eval returns a dict, cubes = {'pred_cube':np.ndarray, 'target_cube':np.ndarray}
cubes = evaluation_experiment.run()