### Imports

In [1]:
import numpy as np
from specmf.models import Graph, MultiFidelityModel
from specmf.utils import error_analysis, load_model_config
from specmf.data import load_data
from specmf.plot import *

In [None]:
%%html
<style type='text/css'>
.CodeMirror{
font-size: 14px;
</style>

### Data loading

In [None]:
dataset_names = [
    "darcy-flow", 
    "inclusion-field", 
    "inclusion-qoi",
    "beam",
    "cavity",
    "burgers",
]

dataset_name = dataset_names[0]

x_LF, x_HF = load_data(
    dataset_name,
    preprocess=True,
    normalize=True, 
    flatten=True,
    )

print(f"{x_LF.shape=}", f"{x_HF.shape=}")

### Plot data 

In [None]:
plot_data(x_LF, x_HF, dataset_name)

### Multi-fidelity model

#### Create graph and model instances

In [None]:
# Create the graph
graph_config = {
    'metric': 'euclidean',
    'dist_space': 'ambient',
    'n_components': None,
    'method': 'full',
    'k_nn': None,
    'corr_scale': None,
    'k_adj': 7,
    'p': 0.5,
    'q': 0.5,
}
g_LF = Graph(data=x_LF, **graph_config)

# Create the model 
model_config = {
    'sigma': 0.01,
    'method': 'full'
}
model = MultiFidelityModel(**model_config)

#### Perform spectral clustering

In [None]:
n_HF = 100
inds_train, labels = model.cluster(g_LF, n_HF)

In [None]:
plot_cluster_size_hist(labels)

In [None]:
# Plot Laplacian spectrum
eigvals, eigvecs = g_LF.laplacian_eig()
plot_spectrum(eigvals, 50)

#### Split high-fidelity data

In [None]:
n_samples = x_HF.shape[0]
inds_test = np.delete(np.arange(n_samples), inds_train)
x_HF_train = x_HF[inds_train, :]
x_HF_test = x_HF[inds_test, :]

print(f"{x_HF_train.shape=}", f"{x_HF_test.shape=}")

#### Fit and train the model

In [None]:
fit_model = True

In [None]:
if fit_model:
    x_MF, C_phi, dPhi, loss_history, kappa_history = model.fit_transform(
        g_LF,
        x_HF_train,
        inds_train,
        maxiter=30,
        step_size=25,
        step_decay_rate=1.2,
        ftol=1e-7,
        verbose=True,
    )
    model.summary()

    iterations = np.arange(len(loss_history))
    plot_loss_and_kappa(iterations, loss_history, kappa_history)

else:
    from pathlib import Path

    notebook_path = Path.cwd()
    yaml_file_path = notebook_path.parent / 'data/model_configs.yaml'
    model_config = load_model_config(yaml_file_path, dataset_name)
    model = MultiFidelityModel(**model_config)
    
    if model_config:
        x_MF, C_phi, dPhi = model.transform(g_LF, x_HF_train, inds_train)
        model.summary()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 4))
ax.hist(dPhi, bins=20)
ax.set_xlabel("Variance", fontsize=14)
ax.set_ylabel("Frequency", fontsize=14)
ax.grid(True, linestyle="--", linewidth=0.5)
plt.tight_layout()
ax.tick_params(axis="both", labelsize=12)
ax.set_title("Variance histogram", fontsize=18)

### Results

In [None]:
# Error Analysis for unseen test datadet
error_analysis(x_LF[inds_test], x_MF[inds_test], x_HF_test)

In [None]:
# Error Analysis for the whole dataset
error_analysis(x_LF, x_MF, x_HF)

In [None]:
E_LF = 100 * np.linalg.norm(x_LF - x_HF, axis=1) / (np.mean(np.linalg.norm(x_HF, axis=1)) + 1e-3)
E_MF = 100 * np.linalg.norm(x_MF - x_HF, axis=1) / (np.mean(np.linalg.norm(x_HF, axis=1)) + 1e-3)

plot_distributions(E_LF, E_MF, bins_LF=50, bins_MF=50, mask=None)

In [None]:
plot_mf_comparison(x_LF, x_MF, x_HF, dataset_name, dPhi=dPhi, inds_centroids=inds_train)