### Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from specmf.data import load_data
from specmf.models import Graph, MultiFidelityModel
from specmf.plot import (
    plot_cluster_size_hist,
    plot_data,
    plot_distributions,
    plot_loss_and_kappa,
    plot_mf_comparison,
    plot_spectrum,
)
from specmf.utils import error_analysis, load_model_config

In [None]:
%%html
<style type='text/css'>
.CodeMirror{
font-size: 14px;
</style>

### Data loading

In [None]:
dataset_name = "elasticity-traction"

x_LF, x_HF, normalization_vars = load_data(
        dataset_name,
        preprocess=True,
        normalize=True,
        return_normalization_vars=True,
        flatten=True,
)

print(f"{x_LF.shape=}", f"{x_HF.shape=}")

### Plot data 

In [None]:
plot_data(x_LF, x_HF, dataset_name)

### Multi-fidelity model

#### Create graph and model instances

In [None]:
# Create the graph
graph_config = {
    'metric': 'euclidean',
    'dist_space': 'ambient',
    'n_components': None,
    'method': 'full',
    'k_nn': None,
    'corr_scale': None,
    'k_adj': 7,
    'p': 0.5,
    'q': 0.5,
}
g_LF = Graph(data=x_LF, **graph_config)

#### Perform spectral clustering

In [None]:
n_HF = 150
inds_train, labels = g_LF.cluster(n_HF)

In [None]:
plot_cluster_size_hist(labels)

In [None]:
# Plot Laplacian spectrum
eigvals, eigvecs = g_LF.laplacian_eig()
plot_spectrum(eigvals, 50)

#### Fit and train the model

#### Split high-fidelity data

In [None]:
n_samples = x_HF.shape[0]
inds_test = np.delete(np.arange(n_samples), inds_train)
x_HF_train = x_HF[inds_train, :]
x_HF_test = x_HF[inds_test, :]

print(f"{x_HF_train.shape=}", f"{x_HF_test.shape=}")

In [None]:
fit_model = True

# Create the model 
model_config = {
    'sigma': 0.01,
    'method': 'full'
}
model = MultiFidelityModel(**model_config)

In [None]:
if fit_model:
    x_MF, C_phi, dPhi, loss_history, kappa_history = model.fit_transform(
        g_LF,
        x_HF_train,
        inds_train,
        maxiter=50,
        verbose=False,
    )
    model.summary()
    plot_loss_and_kappa(loss_history, kappa_history)

else:
    from pathlib import Path

    notebook_path = Path.cwd()
    yaml_file_path = notebook_path.parent.parent / 'data/model_configs.yaml'
    model_config = load_model_config(yaml_file_path, dataset_name)
    
    model = MultiFidelityModel(**model_config)

    x_MF, C_phi, dPhi = model.transform(g_LF, x_HF_train, inds_train)
    model.summary()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 4))
ax.hist(dPhi, bins=20)
ax.set_xlabel("Variance", fontsize=14)
ax.set_ylabel("Frequency", fontsize=14)
ax.grid(True, linestyle="--", linewidth=0.5)
plt.tight_layout()
ax.tick_params(axis="both", labelsize=12)
ax.set_title("Variance histogram for Soft body traction data", fontsize=18)

### Results

In [None]:
# Error Analysis for unseen test datadet
error_analysis(x_LF[inds_test], x_MF[inds_test], x_HF_test, component_wise=True,)

In [None]:
# Error Analysis for the whole dataset
error_analysis(x_LF, x_MF, x_HF, component_wise=True,)

In [None]:
# Error Analysis for the whole dataset in ambient space
X_mean = normalization_vars['X_mean']
X_scale = normalization_vars['X_scale']

X_LF = x_LF * X_scale + X_mean
X_HF = x_HF * X_scale + X_mean
X_MF = x_MF * X_scale + X_mean

eLF, eMF = error_analysis(X_LF, X_MF, X_HF, component_wise=True, return_values=True)

In [None]:
print(f"Average LF error across all components: {np.round(np.mean(eLF), 2)}")
print(f"Average MF error across all components: {np.round(np.mean(eMF), 2)}")

In [None]:
comps = [r"$f_1$", r"$f_2$", r"$f_3$", r"$f_4$", r"$\sigma_{yy}^{\mathrm{max}}$"]
for i in range(5):
    E_LF = 100 * np.linalg.norm(X_LF[:, i:i+1] - X_HF[:, i:i+1], axis=1) / (np.mean(np.linalg.norm(X_HF[:, i:i+1], axis=1)) + 1e-3)
    E_MF = 100 * np.linalg.norm(X_MF[:, i:i+1] - X_HF[:, i:i+1], axis=1) / (np.mean(np.linalg.norm(X_HF[:, i:i+1], axis=1)) + 1e-3)

    _, axs = plot_distributions(E_LF, E_MF, bins_LF=40, bins_MF=50, mask=None, return_axs=True)
    axs[0].set_title(f"{comps[i]}", y=0.8, fontsize=26,)

In [None]:
plot_mf_comparison(
    dataset_name=dataset_name,
    X_LF=x_LF,
    X_MF=x_MF,
    X_HF=x_HF,
    inds_centroids=inds_train,
)

### Visualize uncertainty of multi-fidelity estimates

In [None]:
import matplotlib.ticker as ticker


def fmt(x, pos):
    a, b = f'{x:.2e}'.split('e')
    b = int(b)
    return rf'${a} \times 10^{{{b}}}$'

In [None]:
import umap


g_MF = Graph(data=x_MF, **graph_config)
_, eigevs = g_MF.laplacian_eig()
X = np.real(eigvecs[:, :n_HF])


X_umap = umap.UMAP(
    n_components=2,
    n_neighbors=30,
    min_dist=1,
    init='random', 
    random_state=42
).fit_transform(X)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 9.5))
ax1 = ax.scatter(X_umap[:, 0], X_umap[:, 1], s=15, c=dPhi, vmin=0.02)
ax.scatter(X_umap[inds_train, 0], X_umap[inds_train, 1], s=30, c='r', marker='o')
cb = plt.colorbar(ax1, format=ticker.FuncFormatter(fmt))
cb.ax.tick_params(labelsize=16)
ax.set_xticks([])
ax.set_yticks([])
ax.set_title('UMAP of Elastic Body data (Case 1)', fontsize=26, pad=10)