In [1]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from collections import Counter

# Imports de la libreria propia
from vecopsciml.kernels.derivative import DerivativeKernels
from vecopsciml.utils import TensOps

# Imports de las funciones creadas para este programa
from utils.folders import create_folder
from utils.load_data import load_data
from trainers.train import train_loop

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {DEVICE}")

Using device: cuda


In [3]:
# Creamos los paths para las distintas carpetas
ROOT_PATH = r'/home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning'
DATA_PATH = os.path.join(ROOT_PATH, r'data/non_linear/non_linear_decomposition.pkl')
RESULTS_FOLDER_PATH = os.path.join(ROOT_PATH, r'results/non_linear')
MODEL_RESULTS_AE_PATH = os.path.join(ROOT_PATH, r'results/non_linear/model_autoencoder_AE')
MODEL_RESULTS_PGNNIV_PATH = os.path.join(ROOT_PATH, r'results/non_linear/model_autoencoder_NN')


# Creamos las carpetas que sean necesarias (si ya están creadas se avisará de ello)
create_folder(RESULTS_FOLDER_PATH)
create_folder(MODEL_RESULTS_AE_PATH)
create_folder(MODEL_RESULTS_PGNNIV_PATH)

Folder already exists at: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/results/non_linear
Folder already exists at: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/results/non_linear/model_autoencoder_AE
Folder already exists at: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/results/non_linear/model_autoencoder_NN


In [4]:
# Load dataset
dataset = load_data(DATA_PATH)

Data successfully loaded from: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/data/non_linear/non_linear_decomposition.pkl


In [5]:
# Convolutional filters to derivate
dx = dataset['x_step_size']
dy = dataset['y_step_size']
D = DerivativeKernels(dx, dy, 0).grad_kernels_two_dimensions()

## División de los datos

In [6]:
X_train = torch.Tensor(dataset['X_train']).unsqueeze(1)
y_train = torch.Tensor(dataset['y_train']).unsqueeze(1)
K_train = torch.tensor(dataset['k_train']).unsqueeze(1)
f_train = torch.tensor(dataset['f_train']).unsqueeze(1).to(torch.float32)

X_val = torch.Tensor(dataset['X_val']).unsqueeze(1)
y_val = TensOps(torch.Tensor(dataset['y_val']).unsqueeze(1).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)
K_val = TensOps(torch.tensor(dataset['k_val']).unsqueeze(1).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)
f_val = TensOps(torch.tensor(dataset['f_val']).to(torch.float32).unsqueeze(1).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)

print("Train dataset length:", len(X_train))
print("Validation dataset length:", len(X_val))

Train dataset length: 8000
Validation dataset length: 2000


In [7]:
N_data_AE = len(X_train)//4
N_data_NN = len(X_train) - len(X_train)//4
prop_data_NN = 1 - N_data_AE/(N_data_NN + N_data_AE)

print("Dataset length for the autoencoder:", N_data_AE)
print("Dataset length for the PGNNIV:", N_data_NN)

X_AE, X_NN, y_AE, y_NN, K_AE, K_NN, f_AE, f_NN = train_test_split(X_train, y_train, K_train, f_train, test_size=prop_data_NN, random_state=42)

Dataset length for the autoencoder: 2000
Dataset length for the PGNNIV: 6000


#### Datos para el autoencoder

In [8]:
y_train_AE, y_test_AE = train_test_split(y_AE, test_size=0.2, random_state=42)

y_train_AE = TensOps(y_train_AE.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
y_test_AE = TensOps(y_test_AE.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

#### Datos para la PGNNIV

In [9]:
X_train_NN, X_test_NN, y_train_NN, y_test_NN, K_train_NN, K_test_NN, f_train_NN, f_test_NN = train_test_split(X_NN, y_NN, K_NN, f_NN, test_size=0.2, random_state=42)

X_train_NN = X_train_NN.to(DEVICE)
X_test_NN = X_test_NN.to(DEVICE)

y_train_NN = TensOps(y_train_NN.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
y_test_NN = TensOps(y_test_NN.requires_grad_(True).to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

K_train_NN = TensOps(K_train_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
K_test_NN = TensOps(K_test_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

f_train_NN = TensOps(f_train_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)
f_test_NN = TensOps(f_test_NN.to(DEVICE), space_dimension=2, contravariance=0, covariance=0)

## Autoencoder

In [10]:
from models import Autoencoder
from trainers.eval import loss_function_autoencoder
from utils.checkpoints import load_checkpoint, save_checkpoint
from utils.checkpoints import load_results

In [11]:
def train_autoencoder_epoch(model, optimizer, X_train, y_train):
    model.train()
    y_pred = model(X_train)
    loss = loss_function_autoencoder(y_train, y_pred)

    optimizer.zero_grad() 
    loss.backward(retain_graph=True)
    optimizer.step()
    
    return loss


def test_autoencoder_epoch(model, X_test, y_test):
    y_pred = model(X_test)
    loss = loss_function_autoencoder(y_test, y_pred)

    return loss

In [12]:
def autoencoder_train_loop(model, optimizer, X_train, y_train, X_test, y_test, start_epoch, n_epochs, batch_size, i_checkpoint, model_results_path, device, lr_updated=None):

    if start_epoch > 0:
        print(f'Starting training from a checkpoint. Epoch {start_epoch}.')

        resume_epoch = start_epoch
        model, optimizer, lists = load_checkpoint(model, optimizer, resume_epoch, model_results_path)
        train_total_loss_list = lists['train_total_loss_list']
        test_total_loss_list = lists['test_total_loss_list']

        if lr_updated != None:
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_updated

    else:
        print("Starting training from the beginning")

        train_total_loss_list = []
        test_total_loss_list = []

    N_train = X_train.shape[0]
    N_test = X_test.shape[0]

    for epoch_i in range(start_epoch, n_epochs):

        for batch_start in range(0, N_train, batch_size):
            X_batch = X_train[batch_start:(batch_start+batch_size)].to(device)
            y_batch = TensOps(y_train.values[batch_start:(batch_start+batch_size)].to(device), space_dimension=y_train.space_dim, contravariance=y_train.order[0], covariance=y_train.order[1])

            loss_train = train_autoencoder_epoch(model, optimizer, X_batch, y_batch).item()
            loss_test = test_autoencoder_epoch(model, X_test, y_test).item()

        train_total_loss_list.append(loss_train/batch_size)
        test_total_loss_list.append(loss_test/N_test)

        if epoch_i % (1 if n_epochs < 100 else (10 if n_epochs <= 1000 else 1000)) == 0:
            print(f'Epoch {epoch_i}, Train loss: {loss_train/batch_size:.3e}, Test loss: {loss_test/N_test:.3e}')

        if epoch_i % (i_checkpoint) == 0:
            save_checkpoint(model, optimizer, epoch_i, model_results_path, train_total_loss_list=train_total_loss_list, test_total_loss_list=test_total_loss_list)

    save_checkpoint(model, optimizer, epoch_i, model_results_path, end_flag=True, train_total_loss_list=train_total_loss_list, test_total_loss_list=test_total_loss_list)   


In [13]:
autoencoder_input_shape = y_train_AE.values[0].shape
latent_space_dim = 20
autoencoder_output_shape = y_train_AE.values[0].shape

start_epoch = 0
n_epochs = 100000
batch_size = 64
i_checkpoint = 10000

In [14]:
X_train = y_train_AE.values
y_train = y_train_AE

X_test = y_test_AE.values
y_test = y_test_AE

In [15]:
# model = Autoencoder(input_size=autoencoder_input_shape, encoding_dim=latent_space_dim, output_size=autoencoder_output_shape, device=DEVICE)
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# autoencoder_train_loop(model, optimizer, X_train, y_train, X_test, y_test,  
#                        start_epoch, n_epochs, batch_size, i_checkpoint, MODEL_RESULTS_AE_PATH, DEVICE)

In [16]:
# start_epoch = 90000
# n_epochs = 150000
# batch_size = 64
# i_checkpoint = 10000
# new_lr = 5e-5

# autoencoder_train_loop(model, optimizer, X_train, y_train, X_test, y_test,  
#                        start_epoch, n_epochs, batch_size, i_checkpoint, MODEL_RESULTS_AE_PATH, DEVICE)

In [17]:
autoencoder = Autoencoder(input_size=autoencoder_input_shape, encoding_dim=latent_space_dim, output_size=autoencoder_output_shape, device=DEVICE)
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=1e-4)

autoencoder, optimizer, lists = load_results(autoencoder, optimizer, MODEL_RESULTS_AE_PATH, map_location=torch.device('cpu'))

In [18]:
encoder = autoencoder.encoder
decoder = autoencoder.decoder

## Red neuronal general

In [19]:
nn_input_shape = X_train_NN[0].shape
latent_space_dim = 20
nn_output_shape = y_train_NN.values[0].shape

In [20]:
import torch
import torch.nn as nn

from vecopsciml.utils import TensOps
from vecopsciml.operators.zero_order import Mx, My

class HiddenStatePGNNIV(nn.Module):
    def __init__(self, input_size, latent_space_output, explanatory_output_size, decoder_model, device, **kwargs):
        super(HiddenStatePGNNIV, self).__init__()

        self.input = input_size
        self.latent_space_output = latent_space_output
        self.output_expl = explanatory_output_size

        self.hidden_units_pred = 10
        self.hidden_units_exp = 15
        self.filters_exp = 10

        self.decoder = decoder_model

        self.device = device

        # Predictive network
        self.flatten_layer_pred = nn.Flatten(start_dim=1, end_dim=-1)
        self.hidden1_layer_pred = nn.Linear(torch.prod(torch.tensor(self.input, device=self.device)), self.hidden_units_pred).to(self.device)
        self.hidden2_layer_pred = nn.Linear(self.hidden_units_pred, self.hidden_units_pred).to(self.device)
        self.output_layer_pred = nn.Linear(self.hidden_units_pred, self.latent_space_output).to(self.device)

        # Explanatory network (commented out since they are not used in forward method)
        self.conv1_exp = nn.Conv2d(in_channels=1, out_channels=self.filters_exp, kernel_size=1).to(self.device)
        self.flatten_layer_exp = nn.Flatten().to(self.device)
        self.hidden1_layer_exp = nn.LazyLinear(self.hidden_units_exp).to(self.device)
        self.hidden2_layer_exp = nn.Linear(self.hidden_units_exp, self.hidden_units_exp).to(self.device)
        self.output_layer_exp = nn.Linear(self.hidden_units_exp, self.filters_exp * (self.output_expl[1] - 1) * (self.output_expl[2] - 1)).to(self.device)
        self.conv2_exp = nn.Conv2d(in_channels=self.filters_exp, out_channels=1, kernel_size=1).to(self.device)

    def forward(self, X):

        X = X.to(self.device)

        # Predictive network
        X = self.flatten_layer_pred(X)
        X = torch.sigmoid(self.hidden1_layer_pred(X))
        X = torch.sigmoid(self.hidden2_layer_pred(X))
        output_predictive_net = self.output_layer_pred(X)

        u_pred = decoder(output_predictive_net)
        um_pred = My(Mx(TensOps(u_pred, space_dimension=2, contravariance=0, covariance=0))).values

        x = torch.sigmoid(self.conv1_exp(um_pred))
        x = self.flatten_layer_exp(x)
        x = torch.sigmoid(self.hidden1_layer_exp(x))
        x = torch.sigmoid(self.hidden2_layer_exp(x))
        x = self.output_layer_exp(x)
        x = x.view(x.size(0), self.filters_exp, self.output_expl[1] - 1, self.output_expl[2] - 1)
        K_pred = self.conv2_exp(x)

        return u_pred, K_pred

In [None]:
# Se carga el modelo y el optimizador
pgnniv_model = HiddenStatePGNNIV(input_size=nn_input_shape, latent_space_output=latent_space_dim, explanatory_output_size=nn_output_shape, decoder_model=decoder, device=DEVICE)
optimizer = torch.optim.Adam(pgnniv_model.parameters(), lr=1e-4)

# Parametros de entrenamiento
start_epoch = 900
n_epochs = 10000

batch_size = 64
n_checkpoints = 1000

train_loop(pgnniv_model, optimizer, n_checkpoints,
           X_train_NN, y_train_NN, X_test_NN, y_test_NN, f_train_NN, f_test_NN,
           D=D, start_epoch=start_epoch, n_epochs=n_epochs, batch_size=batch_size, 
           model_results_path=MODEL_RESULTS_PGNNIV_PATH, device=DEVICE,
        )

Start training
Starting from a checkpoint. Epoch 900.
Epoch 900, Train loss: 6.861e+03, Test loss: 1.137e+04, MSE(e): 6.625e-04, MSE(pi1): 1.131e-02, MSE(pi2): 5.967e-04, MSE(pi3): 1.228e-03
Epoch 1000, Train loss: 5.060e+03, Test loss: 8.480e+03, MSE(e): 4.831e-04, MSE(pi1): 1.064e-02, MSE(pi2): 4.208e-04, MSE(pi3): 1.223e-03
Epoch 1100, Train loss: 3.830e+03, Test loss: 6.656e+03, MSE(e): 3.614e-04, MSE(pi1): 9.455e-03, MSE(pi2): 3.006e-04, MSE(pi3): 1.215e-03
Epoch 1200, Train loss: 3.190e+03, Test loss: 5.500e+03, MSE(e): 2.985e-04, MSE(pi1): 8.377e-03, MSE(pi2): 2.369e-04, MSE(pi3): 1.210e-03
Epoch 1300, Train loss: 2.741e+03, Test loss: 4.441e+03, MSE(e): 2.544e-04, MSE(pi1): 8.266e-03, MSE(pi2): 2.003e-04, MSE(pi3): 1.148e-03
Epoch 1400, Train loss: 2.433e+03, Test loss: 3.924e+03, MSE(e): 2.241e-04, MSE(pi1): 8.217e-03, MSE(pi2): 1.783e-04, MSE(pi3): 1.106e-03
Epoch 1500, Train loss: 2.230e+03, Test loss: 3.523e+03, MSE(e): 2.037e-04, MSE(pi1): 8.695e-03, MSE(pi2): 1.632e-04, M