In [1]:
import os 
import sys

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../../")))

In [None]:
import torch
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

# Own library imports
from vecopsciml.utils import TensOps
from vecopsciml.operators.zero_order import Mx, My
from vecopsciml.kernels.derivative import DerivativeKernels

# Function from this project
from utils.folders import create_folder
from utils.load_data import load_data
from trainers.train import train_loop, train_autoencoder_loop

# Import model
from architectures.autoencoder import Autoencoder
from architectures.pgnniv_decoder import PGNNIVAutoencoder

In [3]:
# Dataset
dataset = 'non_linear'
N_data = 100
noise = 1

data_name = dataset + '_' + str(N_data) + '_' + str(noise)

In [4]:
# Model
model = 'autoencoder'
n_modes = 10

model_name = model + '_model_' + str(n_modes)

In [15]:
ROOT_PATH = os.path.abspath(os.path.join(os.getcwd(), "../../"))
DATA_PATH = os.path.join(ROOT_PATH, r'data/', data_name, data_name) + '.pkl'
RESULTS_FOLDER_PATH = os.path.join(ROOT_PATH, r'results/', data_name)

MODEL_RESULTS_AE_PATH = os.path.join(ROOT_PATH, r'results/', data_name, model_name) + '_AE'
MODEL_RESULTS_PGNNIV_PATH = os.path.join(ROOT_PATH, r'results/', data_name, model_name) + '_NN'

# Creamos las carpetas que sean necesarias (si ya están creadas se avisará de ello)
create_folder(RESULTS_FOLDER_PATH)
create_folder(MODEL_RESULTS_AE_PATH)
create_folder(MODEL_RESULTS_PGNNIV_PATH)

Folder already exists at: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/results/non_linear_100_1
Folder already exists at: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/results/non_linear_100_1/autoencoder_model_10_AE
Folder already exists at: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/results/non_linear_100_1/autoencoder_model_10_NN


In [16]:
# Load dataset
dataset = load_data(DATA_PATH)

Data successfully loaded from: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/data/non_linear_100_1/non_linear_100_1.pkl


In [17]:
# Convolutional filters to derivate
dx = dataset['x_step_size']
dy = dataset['y_step_size']
D = DerivativeKernels(dx, dy, 0).grad_kernels_two_dimensions()

In [18]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {DEVICE}")

Using device: cuda


### División de los datos

In [19]:
X_train = torch.Tensor(dataset['X_train']).unsqueeze(1)
y_train = torch.Tensor(dataset['y_train']).unsqueeze(1)
K_train = torch.tensor(dataset['k_train']).unsqueeze(1)
f_train = torch.tensor(dataset['f_train']).unsqueeze(1).to(torch.float32)

X_val = torch.Tensor(dataset['X_val']).unsqueeze(1)
y_val = TensOps(torch.Tensor(dataset['y_val']).unsqueeze(1).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)
K_val = TensOps(torch.tensor(dataset['k_val']).unsqueeze(1).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)
f_val = TensOps(torch.tensor(dataset['f_val']).to(torch.float32).unsqueeze(1).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)

print("Train dataset length:", len(X_train))
print("Validation dataset length:", len(X_val))

Train dataset length: 80
Validation dataset length: 20


In [20]:
N_data_AE = len(X_train)//2
N_data_NN = len(X_train) - len(X_train)//2
prop_data_NN = 1 - N_data_AE/(N_data_NN + N_data_AE)

print("Dataset length for the autoencoder:", N_data_AE)
print("Dataset length for the PGNNIV:", N_data_NN)

X_AE, X_NN, y_AE, y_NN, K_AE, K_NN, f_AE, f_NN = train_test_split(X_train, y_train, K_train, f_train, test_size=prop_data_NN, random_state=42)

Dataset length for the autoencoder: 40
Dataset length for the PGNNIV: 40


#### Datos para el autoencoder

In [21]:
y_train_AE, y_test_AE = train_test_split(y_AE, test_size=0.2, random_state=42)

y_train_AE = TensOps(y_train_AE.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
y_test_AE = TensOps(y_test_AE.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

#### Datos para la PGNNIV

In [28]:
X_train_NN, X_test_NN, y_train_NN, y_test_NN, K_train_NN, K_test_NN, f_train_NN, f_test_NN = train_test_split(X_NN, y_NN, K_NN, f_NN, test_size=0.2, random_state=42)

X_train_NN = X_train_NN.to(DEVICE)
X_test_NN = X_test_NN.to(DEVICE)

y_train_NN = TensOps(y_train_NN.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
y_test_NN = TensOps(y_test_NN.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

K_train_NN = TensOps(K_train_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
K_test_NN = TensOps(K_test_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

f_train_NN = TensOps(f_train_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
f_test_NN = TensOps(f_test_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

## Autoencoder

In [30]:
autoencoder_input_shape = y_train_AE.values[0].shape
latent_space_dim = [15, 10, n_modes, 10, 15]
autoencoder_output_shape = y_train_AE.values[0].shape

In [31]:
X_train = y_train_AE.values
y_train = y_train_AE

X_test = y_test_AE.values
y_test = y_test_AE

In [32]:
autoencoder = Autoencoder(autoencoder_input_shape, latent_space_dim, autoencoder_output_shape).to(DEVICE)
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=1e-2)

start_epoch = 0
n_epochs = 1000
batch_size = 64
n_checkpoint = 10
new_lr = None

train_autoencoder_loop(autoencoder, optimizer, X_train, y_train, X_test, y_test,  
                       n_checkpoint, start_epoch, n_epochs, batch_size, MODEL_RESULTS_AE_PATH, DEVICE, new_lr)

Starting training from scratch.
Epoch 0, Train loss: 5.446e+01, Test loss: 8.564e+01
Epoch 10, Train loss: 8.212e+00, Test loss: 1.227e+01
Epoch 20, Train loss: 3.451e+00, Test loss: 7.113e+00
Epoch 30, Train loss: 2.433e+00, Test loss: 4.520e+00
Epoch 40, Train loss: 2.064e+00, Test loss: 4.114e+00
Epoch 50, Train loss: 1.932e+00, Test loss: 3.814e+00
Epoch 60, Train loss: 1.888e+00, Test loss: 3.679e+00
Epoch 70, Train loss: 1.863e+00, Test loss: 3.682e+00
Epoch 80, Train loss: 1.853e+00, Test loss: 3.668e+00
Epoch 90, Train loss: 1.835e+00, Test loss: 3.670e+00
Epoch 100, Train loss: 1.784e+00, Test loss: 3.664e+00
Epoch 110, Train loss: 1.618e+00, Test loss: 3.611e+00
Epoch 120, Train loss: 1.145e+00, Test loss: 3.432e+00
Epoch 130, Train loss: 8.745e-01, Test loss: 3.903e+00
Epoch 140, Train loss: 7.375e-01, Test loss: 3.176e+00
Epoch 150, Train loss: 8.005e-01, Test loss: 2.504e+00
Epoch 160, Train loss: 6.026e-01, Test loss: 2.298e+00
Epoch 170, Train loss: 5.625e-01, Test loss:

In [33]:
start_epoch = 900
n_epochs = 2000
batch_size = 64
n_checkpoint = 10
new_lr = 1e-4
4
train_autoencoder_loop(autoencoder, optimizer, X_train, y_train, X_test, y_test,  
                       n_checkpoint, start_epoch, n_epochs, batch_size, MODEL_RESULTS_AE_PATH, DEVICE, new_lr)

Starting training from a checkpoint. Epoch 900.
Epoch 900, Train loss: 2.523e-01, Test loss: 6.554e-01
Epoch 1000, Train loss: 2.334e-01, Test loss: 5.859e-01
Epoch 1100, Train loss: 2.327e-01, Test loss: 5.852e-01
Epoch 1200, Train loss: 2.322e-01, Test loss: 5.850e-01
Epoch 1300, Train loss: 2.317e-01, Test loss: 5.850e-01
Epoch 1400, Train loss: 2.313e-01, Test loss: 5.852e-01
Epoch 1500, Train loss: 2.311e-01, Test loss: 5.855e-01
Epoch 1600, Train loss: 2.308e-01, Test loss: 5.858e-01
Epoch 1700, Train loss: 2.306e-01, Test loss: 5.862e-01
Epoch 1800, Train loss: 2.305e-01, Test loss: 5.865e-01
Epoch 1900, Train loss: 2.303e-01, Test loss: 5.868e-01


## PGNNIV

In [37]:
# Predictive network architecture
input_shape = X_train_NN[0].shape
predictive_layers = [15, 10, n_modes]
predictive_output = y_train_NN.values[0].shape

# Explanatory network architecture
explanatory_input = Mx(My(y_train_NN)).values[0].shape
explanatory_layers = [10, 10]
explanatory_output = Mx(My(f_train_NN)).values[0].shape

# Other parameters
n_filters_explanatory = 5

In [38]:
pretrained_decoder = autoencoder.decoder

for param in pretrained_decoder.parameters():
    param.requires_grad = False

for name, param in pretrained_decoder.named_parameters():
    print(f"{name}: requires_grad={param.requires_grad}")

hidden1_layer.weight: requires_grad=False
hidden1_layer.bias: requires_grad=False
hidden2_layer.weight: requires_grad=False
hidden2_layer.bias: requires_grad=False
output_layer.weight: requires_grad=False
output_layer.bias: requires_grad=False


In [40]:
model = PGNNIVAutoencoder(input_shape, predictive_layers, pretrained_decoder, predictive_output, explanatory_input,
                                   explanatory_layers, explanatory_output, n_filters_explanatory).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

# Parametros de entrenamiento
start_epoch = 0
n_epochs = 1000

batch_size = 64
n_checkpoints = 10

train_loop(model, optimizer, X_train_NN, y_train_NN, f_train_NN, X_test_NN, y_test_NN, f_test_NN,
           D, n_checkpoints, start_epoch=start_epoch, n_epochs=n_epochs, batch_size=batch_size, 
           model_results_path=MODEL_RESULTS_PGNNIV_PATH, device=DEVICE)

Starting training from scratch.
Epoch 0, Train loss: 3.215e+07, Test loss: 2.633e+07, MSE(e): 3.207e+00, MSE(pi1): 5.922e+00, MSE(pi2): 1.433e+00, MSE(pi3): 2.240e-01
Epoch 10, Train loss: 1.863e+07, Test loss: 2.269e+07, MSE(e): 1.860e+00, MSE(pi1): 1.241e+00, MSE(pi2): 8.586e-01, MSE(pi3): 9.997e-02
Epoch 20, Train loss: 1.724e+07, Test loss: 1.291e+07, MSE(e): 1.722e+00, MSE(pi1): 1.154e+00, MSE(pi2): 7.925e-01, MSE(pi3): 1.026e-01
Epoch 30, Train loss: 1.407e+07, Test loss: 1.106e+07, MSE(e): 1.405e+00, MSE(pi1): 1.141e+00, MSE(pi2): 6.623e-01, MSE(pi3): 1.032e-01
Epoch 40, Train loss: 9.204e+06, Test loss: 7.417e+06, MSE(e): 9.182e-01, MSE(pi1): 1.154e+00, MSE(pi2): 4.561e-01, MSE(pi3): 9.995e-02
Epoch 50, Train loss: 5.276e+06, Test loss: 1.001e+07, MSE(e): 5.254e-01, MSE(pi1): 1.116e+00, MSE(pi2): 2.886e-01, MSE(pi3): 1.002e-01
Epoch 60, Train loss: 4.926e+06, Test loss: 1.113e+07, MSE(e): 4.905e-01, MSE(pi1): 1.109e+00, MSE(pi2): 2.773e-01, MSE(pi3): 9.693e-02
Epoch 70, Train l

In [41]:
# Parametros de entrenamiento
start_epoch = 900
n_epochs = 2000

batch_size = 64 
n_checkpoints = 100

second_lr = 1e-4

train_loop(model, optimizer, X_train_NN, y_train_NN, f_train_NN, X_test_NN, y_test_NN, f_test_NN,
           D, n_checkpoints, start_epoch=start_epoch, n_epochs=n_epochs, batch_size=batch_size, 
           model_results_path=MODEL_RESULTS_PGNNIV_PATH, device=DEVICE, new_lr=second_lr)

Starting training from a checkpoint. Epoch 900.
Epoch 900, Train loss: 2.824e+06, Test loss: 5.610e+06, MSE(e): 2.810e-01, MSE(pi1): 8.432e-01, MSE(pi2): 1.993e-01, MSE(pi3): 5.704e-02
Epoch 1000, Train loss: 2.818e+06, Test loss: 5.594e+06, MSE(e): 2.803e-01, MSE(pi1): 8.477e-01, MSE(pi2): 1.987e-01, MSE(pi3): 5.698e-02
Epoch 1100, Train loss: 2.818e+06, Test loss: 5.595e+06, MSE(e): 2.803e-01, MSE(pi1): 8.478e-01, MSE(pi2): 1.987e-01, MSE(pi3): 5.699e-02
Epoch 1200, Train loss: 2.817e+06, Test loss: 5.595e+06, MSE(e): 2.803e-01, MSE(pi1): 8.480e-01, MSE(pi2): 1.987e-01, MSE(pi3): 5.699e-02
Epoch 1300, Train loss: 2.817e+06, Test loss: 5.595e+06, MSE(e): 2.803e-01, MSE(pi1): 8.481e-01, MSE(pi2): 1.987e-01, MSE(pi3): 5.700e-02
Epoch 1400, Train loss: 2.817e+06, Test loss: 5.595e+06, MSE(e): 2.803e-01, MSE(pi1): 8.482e-01, MSE(pi2): 1.987e-01, MSE(pi3): 5.700e-02
Epoch 1500, Train loss: 2.817e+06, Test loss: 5.595e+06, MSE(e): 2.803e-01, MSE(pi1): 8.483e-01, MSE(pi2): 1.986e-01, MSE(pi3