In [1]:
import os
import torch
import GPUtil
from sklearn.model_selection import train_test_split

# Imports de la libreria propia
from vecopsciml.kernels.derivative import DerivativeKernels
from vecopsciml.utils import TensOps

# Imports de las funciones creadas para este programa
from utils.folders import create_folder
from utils.load_data import load_data
from trainers.train import train_loop

In [2]:
# Creamos los paths para las distintas carpetas
ROOT_PATH = r'/home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning'
DATA_PATH = os.path.join(ROOT_PATH, r'data/non_linear/non_linear_10000.pkl')
RESULTS_FOLDER_PATH = os.path.join(ROOT_PATH, r'results/non_linear')
MODEL_RESULTS_AE_PATH = os.path.join(ROOT_PATH, r'results/non_linear/model_autoencoder_AE')
MODEL_RESULTS_PGNNIV_PATH = os.path.join(ROOT_PATH, r'results/non_linear/model_autoencoder_NN')


# Creamos las carpetas que sean necesarias (si ya están creadas se avisará de ello)
create_folder(RESULTS_FOLDER_PATH)
create_folder(MODEL_RESULTS_AE_PATH)
create_folder(MODEL_RESULTS_PGNNIV_PATH)

Folder already exists at: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/results/non_linear
Folder successfully created at: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/results/non_linear/model_autoencoder_AE
Folder successfully created at: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/results/non_linear/model_autoencoder_NN


In [3]:
# Load dataset
dataset = load_data(DATA_PATH)

Data successfully loaded from: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/data/non_linear/non_linear_10000.pkl


In [4]:
# Convolutional filters to derivate
dx = dataset['x_step_size']
dy = dataset['y_step_size']
D = DerivativeKernels(dx, dy, 0).grad_kernels_two_dimensions()

In [5]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {DEVICE}")

Using device: cuda


### División de los datos

In [6]:
X_train = torch.Tensor(dataset['X_train']).unsqueeze(1)
y_train = torch.Tensor(dataset['y_train']).unsqueeze(1)
K_train = torch.tensor(dataset['k_train']).unsqueeze(1)
f_train = torch.tensor(dataset['f_train']).unsqueeze(1).to(torch.float32)

X_val = torch.Tensor(dataset['X_val']).unsqueeze(1)
y_val = TensOps(torch.Tensor(dataset['y_val']).unsqueeze(1).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)
K_val = TensOps(torch.tensor(dataset['k_val']).unsqueeze(1).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)
f_val = TensOps(torch.tensor(dataset['f_val']).to(torch.float32).unsqueeze(1).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)

print("Train dataset length:", len(X_train))
print("Validation dataset length:", len(X_val))

Train dataset length: 8000
Validation dataset length: 2000


In [7]:
N_data_AE = len(X_train)//2
N_data_NN = len(X_train) - len(X_train)//2
prop_data_NN = 1 - N_data_AE/(N_data_NN + N_data_AE)

print("Dataset length for the autoencoder:", N_data_AE)
print("Dataset length for the PGNNIV:", N_data_NN)

X_AE, X_NN, y_AE, y_NN, K_AE, K_NN, f_AE, f_NN = train_test_split(X_train, y_train, K_train, f_train, test_size=prop_data_NN, random_state=42)

Dataset length for the autoencoder: 4000
Dataset length for the PGNNIV: 4000


#### Datos para el autoencoder

In [8]:
y_train_AE, y_test_AE = train_test_split(y_AE, test_size=0.2, random_state=42)

y_train_AE = TensOps(y_train_AE.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
y_test_AE = TensOps(y_test_AE.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

#### Datos para la PGNNIV

In [9]:
X_train_NN, X_test_NN, y_train_NN, y_test_NN, K_train_NN, K_test_NN, f_train_NN, f_test_NN = train_test_split(X_NN, y_NN, K_NN, f_NN, test_size=0.2, random_state=42)

X_train_NN = X_train_NN.to(DEVICE)
X_test_NN = X_test_NN.to(DEVICE)

y_train_NN = TensOps(y_train_NN.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
y_test_NN = TensOps(y_test_NN.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

K_train_NN = TensOps(K_train_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
K_test_NN = TensOps(K_test_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

f_train_NN = TensOps(f_train_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
f_test_NN = TensOps(f_test_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

## Autoencoder

In [10]:
from model import Autoencoder
from trainers.train import train_autoencoder_loop

In [11]:
autoencoder_input_shape = y_train_AE.values[0].shape
latent_space_dim = [15, 10, 3, 10, 15]
autoencoder_output_shape = y_train_AE.values[0].shape

In [12]:
X_train = y_train_AE.values
y_train = y_train_AE

X_test = y_test_AE.values
y_test = y_test_AE

In [13]:
autoencoder = Autoencoder(autoencoder_input_shape, latent_space_dim, autoencoder_output_shape).to(DEVICE)
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=1e-2)

start_epoch = 0
n_epochs = 10000
batch_size = 64
n_checkpoint = 10
new_lr = None

train_autoencoder_loop(autoencoder, optimizer, X_train, y_train, X_test, y_test,  
                       n_checkpoint, start_epoch, n_epochs, batch_size, MODEL_RESULTS_AE_PATH, DEVICE, new_lr)

Starting training from scratch.
Epoch 0, Train loss: 4.323e+00, Test loss: 4.388e+00
Epoch 100, Train loss: 5.006e-01, Test loss: 4.657e-01
Epoch 200, Train loss: 2.312e-01, Test loss: 1.892e-01
Epoch 300, Train loss: 1.186e-02, Test loss: 2.122e-02
Epoch 400, Train loss: 3.329e-03, Test loss: 5.806e-03
Epoch 500, Train loss: 5.577e-03, Test loss: 1.023e-02
Epoch 600, Train loss: 2.464e-03, Test loss: 2.774e-03
Epoch 700, Train loss: 2.872e-03, Test loss: 4.983e-03
Epoch 800, Train loss: 1.241e-02, Test loss: 9.406e-03
Epoch 900, Train loss: 7.351e-03, Test loss: 1.086e-02
Epoch 1000, Train loss: 4.231e-03, Test loss: 9.199e-03
Epoch 1100, Train loss: 5.478e-03, Test loss: 9.434e-03
Epoch 1200, Train loss: 1.107e-03, Test loss: 2.805e-03
Epoch 1300, Train loss: 1.897e-03, Test loss: 4.299e-03
Epoch 1400, Train loss: 2.755e-03, Test loss: 4.882e-03
Epoch 1500, Train loss: 5.208e-03, Test loss: 9.262e-03
Epoch 1600, Train loss: 6.394e-03, Test loss: 9.990e-03
Epoch 1700, Train loss: 5.77

In [14]:
start_epoch = 9000
n_epochs = 20000
batch_size = 64
n_checkpoint = 10
new_lr = 1e-4
4
train_autoencoder_loop(autoencoder, optimizer, X_train, y_train, X_test, y_test,  
                       n_checkpoint, start_epoch, n_epochs, batch_size, MODEL_RESULTS_AE_PATH, DEVICE, new_lr)

Starting training from a checkpoint. Epoch 9000.
Epoch 9000, Train loss: 7.591e-04, Test loss: 1.628e-03
Epoch 9100, Train loss: 3.674e-04, Test loss: 1.382e-03
Epoch 9200, Train loss: 3.343e-04, Test loss: 1.329e-03
Epoch 9300, Train loss: 3.122e-04, Test loss: 1.296e-03
Epoch 9400, Train loss: 2.958e-04, Test loss: 1.275e-03
Epoch 9500, Train loss: 2.827e-04, Test loss: 1.260e-03
Epoch 9600, Train loss: 2.716e-04, Test loss: 1.248e-03
Epoch 9700, Train loss: 2.620e-04, Test loss: 1.237e-03
Epoch 9800, Train loss: 2.534e-04, Test loss: 1.229e-03
Epoch 9900, Train loss: 2.457e-04, Test loss: 1.220e-03
Epoch 10000, Train loss: 2.386e-04, Test loss: 1.213e-03
Epoch 10100, Train loss: 2.321e-04, Test loss: 1.206e-03
Epoch 10200, Train loss: 2.261e-04, Test loss: 1.200e-03
Epoch 10300, Train loss: 2.204e-04, Test loss: 1.193e-03
Epoch 10400, Train loss: 2.151e-04, Test loss: 1.188e-03
Epoch 10500, Train loss: 2.101e-04, Test loss: 1.182e-03
Epoch 10600, Train loss: 2.054e-04, Test loss: 1.

## PGNNIV

In [34]:
from vecopsciml.operators.zero_order import Mx, My
from model.ae_nonlinear_model import AutoencoderNonlinearModel

In [35]:
# Predictive network architecture
input_shape = X_train_NN[0].shape
predictive_layers = [15, 10, 3]
predictive_output = y_train_NN.values[0].shape

# Explanatory network architecture
explanatory_input = Mx(My(y_train_NN)).values[0].shape
explanatory_layers = [10, 10]
explanatory_output = Mx(My(f_train_NN)).values[0].shape

# Other parameters
n_filters_explanatory = 5

In [36]:
pretrained_decoder = autoencoder.decoder

for param in pretrained_decoder.parameters():
    param.requires_grad = False

for name, param in pretrained_decoder.named_parameters():
    print(f"{name}: requires_grad={param.requires_grad}")

hidden1_layer.weight: requires_grad=False
hidden1_layer.bias: requires_grad=False
hidden2_layer.weight: requires_grad=False
hidden2_layer.bias: requires_grad=False
output_layer.weight: requires_grad=False
output_layer.bias: requires_grad=False


In [None]:
model = AutoencoderNonlinearModel(input_shape, predictive_layers, pretrained_decoder, predictive_output, explanatory_input,
                                   explanatory_layers, explanatory_output, n_filters_explanatory).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

# Parametros de entrenamiento
start_epoch = 0
n_epochs = 10000

batch_size = 64
n_checkpoints = 10

train_loop(model, optimizer, X_train_NN, y_train_NN, f_train_NN, X_test_NN, y_test_NN, f_test_NN,
           D, n_checkpoints, start_epoch=start_epoch, n_epochs=n_epochs, batch_size=batch_size, 
           model_results_path=MODEL_RESULTS_PGNNIV_PATH, device=DEVICE)

Starting training from scratch.
Epoch 0, Train loss: 8.100e+08, Test loss: 8.041e+08, MSE(e): 7.854e+01, MSE(pi1): 1.928e+03, MSE(pi2): 5.232e+01, MSE(pi3): 5.354e+01


Epoch 100, Train loss: 6.621e+06, Test loss: 6.980e+06, MSE(e): 6.600e-01, MSE(pi1): 7.701e-01, MSE(pi2): 3.973e-01, MSE(pi3): 1.278e-01
Epoch 200, Train loss: 2.879e+06, Test loss: 3.139e+06, MSE(e): 2.864e-01, MSE(pi1): 6.573e-01, MSE(pi2): 1.997e-01, MSE(pi3): 7.917e-02
Epoch 300, Train loss: 8.319e+05, Test loss: 8.086e+05, MSE(e): 8.242e-02, MSE(pi1): 4.528e-01, MSE(pi2): 6.153e-02, MSE(pi3): 3.107e-02
Epoch 400, Train loss: 7.325e+05, Test loss: 7.501e+05, MSE(e): 7.282e-02, MSE(pi1): 1.832e-01, MSE(pi2): 5.756e-02, MSE(pi3): 2.439e-02
Epoch 500, Train loss: 7.125e+05, Test loss: 6.655e+05, MSE(e): 7.091e-02, MSE(pi1): 1.410e-01, MSE(pi2): 5.563e-02, MSE(pi3): 1.960e-02
Epoch 600, Train loss: 6.607e+05, Test loss: 6.201e+05, MSE(e): 6.576e-02, MSE(pi1): 1.285e-01, MSE(pi2): 5.508e-02, MSE(pi3): 1.764e-02
Epoch 700, Train loss: 6.276e+05, Test loss: 6.077e+05, MSE(e): 6.249e-02, MSE(pi1): 1.049e-01, MSE(pi2): 5.190e-02, MSE(pi3): 1.595e-02
Epoch 800, Train loss: 5.988e+05, Test lo

KeyboardInterrupt: 

In [19]:
# Parametros de entrenamiento
start_epoch = 9000
n_epochs = 100000

batch_size = 64 
n_checkpoints = 100

second_lr = 1e-4

train_loop(model, optimizer, X_train_NN, y_train_NN, f_train_NN, X_test_NN, y_test_NN, f_test_NN,
           D, n_checkpoints, start_epoch=start_epoch, n_epochs=n_epochs, batch_size=batch_size, 
           model_results_path=MODEL_RESULTS_PGNNIV_PATH, device=DEVICE, new_lr=second_lr)

Starting training from a checkpoint. Epoch 9000.
Epoch 9000, Train loss: 8.327e+02, Test loss: 1.153e+03, MSE(e): 7.252e-05, MSE(pi1): 2.688e-03, MSE(pi2): 4.618e-05, MSE(pi3): 8.062e-04
Epoch 9100, Train loss: 6.275e+02, Test loss: 9.711e+02, MSE(e): 5.204e-05, MSE(pi1): 2.831e-03, MSE(pi2): 3.602e-05, MSE(pi3): 7.882e-04
Epoch 9200, Train loss: 6.154e+02, Test loss: 9.405e+02, MSE(e): 5.085e-05, MSE(pi1): 2.899e-03, MSE(pi2): 3.525e-05, MSE(pi3): 7.790e-04
Epoch 9300, Train loss: 5.932e+02, Test loss: 9.180e+02, MSE(e): 4.864e-05, MSE(pi1): 2.891e-03, MSE(pi2): 3.389e-05, MSE(pi3): 7.784e-04
Epoch 9400, Train loss: 5.759e+02, Test loss: 9.012e+02, MSE(e): 4.692e-05, MSE(pi1): 2.887e-03, MSE(pi2): 3.285e-05, MSE(pi3): 7.778e-04
Epoch 9500, Train loss: 5.618e+02, Test loss: 8.877e+02, MSE(e): 4.552e-05, MSE(pi1): 2.885e-03, MSE(pi2): 3.202e-05, MSE(pi3): 7.772e-04
Epoch 9600, Train loss: 5.500e+02, Test loss: 8.762e+02, MSE(e): 4.435e-05, MSE(pi1): 2.884e-03, MSE(pi2): 3.133e-05, MSE(p