In [1]:
import sys
import os
project_root = "/Users/linarojas/Desktop/Research/Papers/Combinatorial_Ternary/Hybrid-Experimental-Data-Driven-Workflow"
sys.path.append(sys.path.append(project_root))

In [24]:
import numpy as np
import pandas as pd
import joblib
import torch

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel,ConstantKernel,Matern
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from Data_Extraction.DataPreprocessor import DataPreprocessor

from torch.utils.data import DataLoader, TensorDataset
from Models.VAE.VAE_Model import VAE

Data Structure
1. Simulated data (AFLOW extraction)
2. Data Preprocessing
3. Extract VAE model 
4. VAE latent space (Compositions - Electronic Configuration)

In [None]:
# 1. Simulated Data
input_data = os.path.join(
    project_root,
    "Models",
    "VAE",
    "Input_Data_VAE.pkl"
)

output_data = os.path.join(
    project_root,
    "Models",
    "VAE",
    "Output_TC.pkl"
)
df_input = pd.read_pickle(input_data)
df_output = pd.read_pickle(output_data)

In [16]:
# 2. Data Preprocession
dp = DataPreprocessor()
X_train, X_test, y_train,y_test = dp.split_training(df_input,df_output)
X_train_scaled = dp.fit_transform(X_train)
X_test_scaled = dp.fit_transform(X_test)

In [9]:
# 3. Extract VAE model
device = "cuda" if torch.cuda.is_available() else "cpu"
model_path = os.path.join(
    project_root,
    "Models",
    "VAE",
    "vae_model_AFLOW.pth"
)

model = VAE(
    input_dim = 2204,
    latent_dim = 16,
    hidden_dims = (256,128)
)

state = torch.load(model_path, map_location=torch.device("cpu"))
model.to(device)
model.eval()

VAE(
  (enc): Sequential(
    (0): Linear(in_features=2204, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
  )
  (mu): Linear(in_features=128, out_features=16, bias=True)
  (logvar): Linear(in_features=128, out_features=16, bias=True)
  (dec): Sequential(
    (0): Linear(in_features=16, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=2204, bias=True)
  )
)

In [15]:
# 4. VAE Latent Space

def latent_space (X_variables):
    # --- Convert to tensor ---
    X_np = X_variables
    X = torch.tensor(X_np, dtype=torch.float32)

    # --- DataLoader ---
    dataloader = DataLoader(
        TensorDataset(X),
        batch_size=128,
        shuffle=False
    )

    # --- Extract latent space ---
    latents = []

    model.eval()
    with torch.no_grad():
        for (x,) in dataloader:
            x = x.to(device)

            h = model.enc(x)
            mu = model.mu(h)

            latents.append(mu.cpu())

    X_latent = torch.cat(latents, dim=0).numpy()
    return X_latent

X_train_latent = latent_space(X_train_scaled)

Model GP training
1. Define Kernel
2. Train model (Create line to save it as well)
3. 

In [25]:
# 1. Define Kernel
kernel = (
    ConstantKernel(1.0, (1e-3, 1e3)) *
    Matern(length_scale=1.0, nu=1.5) +
    WhiteKernel(noise_level=1e-3, noise_level_bounds=(1e-5, 1e1))
)

Train the GP 

In [26]:
# 2. Train model
gp = GaussianProcessRegressor(
    kernel=kernel,
    n_restarts_optimizer=10,
    normalize_y=False
)

gp.fit(X_train_latent, y_train)

#joblib.dump(gp, "gp_model.joblib") # Save the model

print(gp.kernel_) # Inspect Kernel

13.5**2 * Matern(length_scale=0.0344, nu=1.5) + WhiteKernel(noise_level=10)




In [27]:
print(gp.kernel_) # Inspect Kernel

13.5**2 * Matern(length_scale=0.0344, nu=1.5) + WhiteKernel(noise_level=10)


Model Accuracy
1. Calculate the testing gp data (Convert testing data first)
2. Apply error metrics

In [28]:
# 1. Convert testing data
X_test_latent = latent_space(X_test_scaled)
y_test_pred, y_std =  gp.predict(X_test_latent, return_std=True)

In [29]:
# 2. Apply error metrics

mse  = mean_squared_error(y_test, y_test_pred)
rmse = np.sqrt(mse)
mae  = mean_absolute_error(y_test, y_test_pred)
r2   = r2_score(y_test, y_test_pred)

print(f"MSE  = {mse:.4f}")
print(f"RMSE = {rmse:.4f}")
print(f"MAE  = {mae:.4f}")
print(f"R²   = {r2:.4f}")

MSE  = 18.2975
RMSE = 4.2776
MAE  = 2.3763
R²   = 0.5890
