In [211]:
import os
import torch
import GPUtil
from sklearn.model_selection import train_test_split

# Imports de la libreria propia
from vecopsciml.kernels.derivative import DerivativeKernels
from vecopsciml.utils import TensOps

# Imports de las funciones creadas para este programa
from model.baseline_model import BaselineNonlinearModel
from utils.folders import create_folder
from utils.load_data import load_data
from trainers.train import train_loop

In [212]:
import matplotlib.pyplot as plt
from vecopsciml.operators.zero_order import Mx, My

In [213]:
data_name = 'non_linear_100_0'
n_modes = 100

# Creamos los paths para las distintas carpetas
ROOT_PATH = r'/home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning'
DATA_PATH = os.path.join(ROOT_PATH, r'data/', data_name, data_name) + '.pkl'
RESULTS_FOLDER_PATH = os.path.join(ROOT_PATH, r'results/', data_name)
MODEL_RESULTS_PATH = os.path.join(ROOT_PATH, r'results/', data_name, 'baseline_model_') + str(n_modes)

# Creamos las carpetas que sean necesarias (si ya están creadas se avisará de ello)
create_folder(RESULTS_FOLDER_PATH)
create_folder(MODEL_RESULTS_PATH)

Folder already exists at: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/results/non_linear_100_0
Folder successfully created at: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/results/non_linear_100_0/baseline_model_100


In [214]:
# Load dataset
dataset = load_data(DATA_PATH)

Data successfully loaded from: /home/rmunoz/Escritorio/rmunozTMELab/Physically-Guided-Machine-Learning/data/non_linear_100_0/non_linear_100_0.pkl


In [215]:
# Convolutional filters to derivate
dx = dataset['x_step_size']
dy = dataset['y_step_size']
D = DerivativeKernels(dx, dy, 0).grad_kernels_two_dimensions()

In [216]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {DEVICE}")

Using device: cuda


In [217]:
# Train data splitting in train/test
X = torch.tensor(dataset['X_train'], dtype=torch.float32).unsqueeze(1)
y = torch.tensor(dataset['y_train'], dtype=torch.float32).unsqueeze(1)
K = torch.tensor(dataset['k_train'], dtype=torch.float32).unsqueeze(1)
f = torch.tensor(dataset['f_train'], dtype=torch.float32).unsqueeze(1)

X_train, X_test, y_train, y_test, K_train, K_test, f_train, f_test = train_test_split(X, y, K, f, test_size=0.3, random_state=42)

# Data processing and adequacy with our TensOps library
X_train = X_train.to(DEVICE)
X_test = X_test.to(DEVICE)

y_train = TensOps(y_train.to(DEVICE).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)
y_test = TensOps(y_test.to(DEVICE).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)

K_train = TensOps(K_train.to(DEVICE).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)
K_test = TensOps(K_test.to(DEVICE).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)

f_train = TensOps(f_train.to(DEVICE).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)
f_test = TensOps(f_test.to(DEVICE).requires_grad_(True), space_dimension=2, contravariance=0, covariance=0)

# Loading and processing validation data
X_val = torch.tensor(dataset['X_val'], dtype=torch.float32).unsqueeze(1)
y_val = TensOps(torch.tensor(dataset['y_val'], dtype=torch.float32, requires_grad=True).unsqueeze(1), space_dimension=2, contravariance=0, covariance=0)
K_val = TensOps(torch.tensor(dataset['k_val'], dtype=torch.float32, requires_grad=True).unsqueeze(1), space_dimension=2, contravariance=0, covariance=0)
f_val = TensOps(torch.tensor(dataset['f_val'], dtype=torch.float32, requires_grad=True).unsqueeze(1), space_dimension=2, contravariance=0, covariance=0)

In [218]:
# Predictive network architecture
input_shape = X_train[0].shape
predictive_layers = [20, 10, n_modes, 10, 20]
predictive_output = y_train.values[0].shape

# Explanatory network architecture
explanatory_input = Mx(My(y_train)).values[0].shape
explanatory_layers = [10, 10]
explanatory_output = Mx(My(f_train)).values[0].shape

# Other parameters
n_filters_explanatory = 5

In [219]:
# Load model and the optimizer
model = BaselineNonlinearModel(input_shape, predictive_layers, predictive_output, explanatory_input, explanatory_layers, explanatory_output, n_filters_explanatory).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

# Parametros de entrenamiento
start_epoch = 0
n_epochs = 20000

batch_size = 64
n_checkpoints = 10

train_loop(model, optimizer, X_train, y_train, f_train, X_test, y_test, f_test,
           D,  n_checkpoints, start_epoch=start_epoch, n_epochs=n_epochs, batch_size=batch_size, 
           model_results_path=MODEL_RESULTS_PATH, device=DEVICE)

Starting training from scratch.
Epoch 0, Train loss: 1.020e+09, Test loss: 6.466e+09, MSE(e): 1.003e+02, MSE(pi1): 1.639e+03, MSE(pi2): 3.952e+01, MSE(pi3): 4.686e+00
Epoch 100, Train loss: 3.641e+07, Test loss: 4.603e+07, MSE(e): 3.604e+00, MSE(pi1): 1.406e+01, MSE(pi2): 1.446e+00, MSE(pi3): 2.237e+00
Epoch 200, Train loss: 3.069e+07, Test loss: 3.677e+07, MSE(e): 3.052e+00, MSE(pi1): 6.044e+00, MSE(pi2): 1.390e+00, MSE(pi3): 1.041e+00
Epoch 300, Train loss: 3.050e+07, Test loss: 3.654e+07, MSE(e): 3.037e+00, MSE(pi1): 4.633e+00, MSE(pi2): 1.387e+00, MSE(pi3): 7.755e-01
Epoch 400, Train loss: 3.035e+07, Test loss: 3.642e+07, MSE(e): 3.027e+00, MSE(pi1): 3.001e+00, MSE(pi2): 1.386e+00, MSE(pi3): 5.201e-01
Epoch 500, Train loss: 2.667e+07, Test loss: 3.262e+07, MSE(e): 2.479e+00, MSE(pi1): 1.794e+02, MSE(pi2): 1.148e+00, MSE(pi3): 8.705e-01
Epoch 600, Train loss: 6.616e+06, Test loss: 7.614e+06, MSE(e): 6.355e-01, MSE(pi1): 1.908e+01, MSE(pi2): 3.985e-01, MSE(pi3): 6.972e-01
Epoch 700, 

In [220]:
# Parametros de entrenamiento
start_epoch = 18000
n_epochs = 100000

batch_size = 64
n_checkpoints = 10

second_lr = 1e-4

train_loop(model, optimizer, X_train, y_train, f_train, X_test, y_test, f_test,
           D,  n_checkpoints, start_epoch=start_epoch, n_epochs=n_epochs, batch_size=batch_size, 
           model_results_path=MODEL_RESULTS_PATH, device=DEVICE, new_lr=second_lr)

Starting training from a checkpoint. Epoch 18000.
Epoch 18000, Train loss: 1.135e+04, Test loss: 1.384e+05, MSE(e): 6.907e-04, MSE(pi1): 1.465e-01, MSE(pi2): 6.330e-04, MSE(pi3): 2.975e-02


Epoch 18100, Train loss: 1.132e+04, Test loss: 1.392e+05, MSE(e): 6.895e-04, MSE(pi1): 1.452e-01, MSE(pi2): 6.327e-04, MSE(pi3): 2.973e-02
Epoch 18200, Train loss: 1.131e+04, Test loss: 1.391e+05, MSE(e): 6.890e-04, MSE(pi1): 1.451e-01, MSE(pi2): 6.323e-04, MSE(pi3): 2.972e-02
Epoch 18300, Train loss: 1.131e+04, Test loss: 1.390e+05, MSE(e): 6.884e-04, MSE(pi1): 1.451e-01, MSE(pi2): 6.319e-04, MSE(pi3): 2.970e-02
Epoch 18400, Train loss: 1.130e+04, Test loss: 1.390e+05, MSE(e): 6.879e-04, MSE(pi1): 1.450e-01, MSE(pi2): 6.314e-04, MSE(pi3): 2.969e-02
Epoch 18500, Train loss: 1.129e+04, Test loss: 1.389e+05, MSE(e): 6.873e-04, MSE(pi1): 1.449e-01, MSE(pi2): 6.310e-04, MSE(pi3): 2.967e-02
Epoch 18600, Train loss: 1.128e+04, Test loss: 1.388e+05, MSE(e): 6.867e-04, MSE(pi1): 1.449e-01, MSE(pi2): 6.304e-04, MSE(pi3): 2.966e-02
Epoch 18700, Train loss: 1.127e+04, Test loss: 1.388e+05, MSE(e): 6.861e-04, MSE(pi1): 1.448e-01, MSE(pi2): 6.299e-04, MSE(pi3): 2.964e-02
Epoch 18800, Train loss: 1.