In [None]:
import sys
sys.path.append('../')

import numpy as np
import ase.io
import utilities.feature_space_measures
# feature measures
from utilities.feature_space_measures import (compute_gfre,
                                              compute_pointwise_gfre,
                                              compute_gfrd,
                                              compute_lfre,
                                              compute_pointwise_lfre)
# utils
from utilities.feature_space_measures import (standardize_features,
                                              standardize_kernel,
                                              generate_train_test_idx)
import matplotlib as mpl
import matplotlib.pyplot as plt
from rascal.representations import SphericalInvariants

mpl.rcParams['lines.linewidth'] = 3
mpl.rcParams['axes.linewidth'] = 2
mpl.rcParams['axes.labelsize'] = 28
mpl.rcParams['legend.fontsize'] = 20
mpl.rcParams['axes.titlesize'] = 30
mpl.rcParams['xtick.labelsize'] = 22
mpl.rcParams['ytick.labelsize'] = 22

These are the hyperparameters for the representation which we use for in all examples. The `soap_features` are symmetrized 3-body atom correlation function features (SOAP) features while the  `bispectrum_features` are symmetrized 4-body atom correlation function features (bispectrum).

In [None]:
radial_basis = "GTO"
cutoff = 4
max_radial = 2
max_angular = 2
smearing_sigma = 0.5
gaussian_sigma_type = "Constant"
cutoff_smooth_width = 0.5
normalize = False

soap_hyperparameters = {
        "soap_type": "PowerSpectrum",
        "radial_basis": radial_basis,
        "interaction_cutoff": cutoff,
        "max_radial": max_radial,
        "max_angular": max_angular,
        "gaussian_sigma_constant": smearing_sigma,
        "gaussian_sigma_type": gaussian_sigma_type,
        "cutoff_smooth_width": cutoff_smooth_width,
        "normalize": normalize
    }
bispectrum_hyperparameters =  {
        "soap_type": "BiSpectrum",
        "radial_basis": radial_basis,
        "interaction_cutoff": cutoff,
        "max_radial": max_radial,
        "max_angular": max_angular,
        "gaussian_sigma_constant": smearing_sigma,
        "gaussian_sigma_type": gaussian_sigma_type,
        "cutoff_smooth_width": cutoff_smooth_width,
        "normalize": normalize
    }

def compute_features(feature_hypers, frames):
    representation = SphericalInvariants(**feature_hypers)
    return representation.transform(frames).get_features(representation)

### Example 1: GFRE/GFRD body order comparison

In [None]:
chemical_shift_frames = ase.io.read("../datasets/CSD-1000R.xyz", ":10")

soap_features = compute_features(soap_hyperparameters, chemical_shift_frames)
bispectrum_features = features = compute_features(bispectrum_hyperparameters, chemical_shift_frames)

gfre_matrix = np.zeros((2,2))
print("Computing GFRE...")
gfre_matrix[0,0] = compute_gfre(soap_features, soap_features)
gfre_matrix[0,1] = compute_gfre(soap_features, bispectrum_features)
gfre_matrix[1,0] = compute_gfre(bispectrum_features, soap_features)
gfre_matrix[1,1] = compute_gfre(bispectrum_features, bispectrum_features)
print("Computing GFRE finished.")

gfrd_matrix = np.zeros((2,2))
print("Computing GFRD...")
gfrd_matrix[0,0] = compute_gfrd(soap_features, soap_features)
gfrd_matrix[0,1] = compute_gfrd(soap_features, bispectrum_features)
gfrd_matrix[1,0] = compute_gfrd(bispectrum_features, soap_features)
gfrd_matrix[1,1] = compute_gfrd(bispectrum_features, bispectrum_features)
print("Computing GFRD finished.")

In [None]:
fig, axes = plt.subplots(1,2, figsize=(16,4))
pcm1 = axes[0].imshow(gfre_matrix)
pcm2 = axes[1].imshow(gfrd_matrix)
axes[0].set_ylabel("F")
axes[1].set_ylabel("F")
axes[0].set_xlabel("F'")
axes[1].set_xlabel("F'")
axes[0].set_title("GFRE(F, F')")
axes[1].set_title("GFRD(F, F')")
axes[0].set_xticks([0,1])
axes[0].set_xticklabels(["3-body","4-body"])
axes[1].set_xticks([0,1])
axes[1].set_xticklabels(["3-body","4-body"])
axes[0].set_yticks([0,1])
axes[0].set_yticklabels(["3-body","4-body"])
axes[1].set_yticks([0,1])
axes[1].set_yticklabels(["3-body","4-body"])
plt.colorbar(pcm1, ax=axes[0], label="GFRE")
plt.colorbar(pcm2, ax=axes[1], label="GFRD")
plt.show()

It can be seen that the 4-body features can reconstruct the 3-body features with significant distortion of the features better than the 3-body can reconstruct the 4-body features.

## Example 2: Pointwise GFRE with RBF RKHS features

Now we compute the RKHS features for the kernel 
\begin{align}
k_E^{\textrm{RBF}}(\mathbf{x},\mathbf{x}') = \exp(-\gamma \|\mathbf{x}-\mathbf{x}'\|^2),\quad \gamma\in\mathbb{R}_+ \label{eq:k-rbf}
\end{align}

In [None]:
def compute_standardized_rbf_rkhs_features(features, gamma, train_idx):
    features = standardize_features(features, train_idx)
    squared_distance = ( np.sum(features ** 2, axis=1)[:, np.newaxis]
                                  + np.sum(features ** 2, axis=1)[np.newaxis, :] - 2 * features.dot(features.T) )
    kernel = np.exp(-gamma * squared_distance)
    kernel = standardize_kernel(kernel, train_idx)
    U, S, _ = np.linalg.svd(kernel)
    # retain features associated with the largest eigenvalue which together
    # contribute not more than 99% of the total variance
    select_idx = np.where( np.cumsum(np.sort(S)[::-1]/np.sum(S) < 0.99) )[0]
    return U[:,select_idx].dot(np.diag(np.sqrt(S[select_idx])))

# we use the default cross validation args from the paper to generate the training idx
my_cross_validation_kwargs = utilities.feature_space_measures.DEFAULT_CROSS_VALIDATION_KWARGS
train_idx, test_idx = generate_train_test_idx(sum([len(frame) for frame in chemical_shift_frames]),
                                              my_cross_validation_kwargs['train_test_split'],
                                              my_cross_validation_kwargs['train_ratio'],
                                              my_cross_validation_kwargs['seed'])
gamma = 0.1
soap_rbf_features = compute_standardized_rbf_rkhs_features(soap_features, gamma, train_idx)

In [None]:
print("Computing pointwise GFRE...")
soap_to_bispectrum_pointwise_gfre = compute_pointwise_gfre(soap_features, bispectrum_features, my_cross_validation_kwargs)
soap_rbf_to_bispectrum_pointwise_gfre = compute_pointwise_gfre(soap_rbf_features, bispectrum_features, my_cross_validation_kwargs)
print("Computing pointwise GFRE finished.")
soap_to_bispectrum_gfre = np.linalg.norm(soap_to_bispectrum_pointwise_gfre)/np.sqrt(len(soap_to_bispectrum_pointwise_gfre))
soap_rbf_to_bispectrum_gfre = np.linalg.norm(soap_rbf_to_bispectrum_pointwise_gfre)/np.sqrt(len(soap_rbf_to_bispectrum_pointwise_gfre))

In [None]:
fig, axes = plt.subplots(1,1, figsize=(12,7))
axes.hist(soap_to_bispectrum_pointwise_gfre, alpha=0.8, label="pointwise GFRE(3-body, 4-body)")
axes.hist(soap_rbf_to_bispectrum_pointwise_gfre, color='r', alpha=0.8, label="pointwise GFRE(3-body RBF, 4-body)")
axes.axvline(soap_to_bispectrum_gfre, color='darkblue', label="GFRE(3-body, 4-body)")
axes.axvline(soap_rbf_to_bispectrum_gfre, color='darkred', label="GFRE(3-body RBF, 4-body)")
axes.set_title(f"3-body vs 4-body RBF gamma={gamma} comparison")
axes.set_xlabel("pointwise GFRE")
axes.set_ylabel("number of samples")
axes.legend()
plt.show()

It can be seen that the RBF features improved the reconstruction of the 4-body features.

### Example 3: LFRE on the degenerate manifold dataset

In [None]:
degenerate_manifold_frames = ase.io.read("../datasets/manif-minus-plus.extxyz", ":")
for frame in degenerate_manifold_frames:
    frame.cell = np.ones(3)*15
    frame.center()
    frame.numbers = np.ones(len(frame.numbers))

# select methane environments
print("Computing features...")
nb_local_envs = 15
soap_features = compute_features(soap_hyperparameters, degenerate_manifold_frames )[::5]
bispectrum_features = features = compute_features(bispectrum_hyperparameters, degenerate_manifold_frames)[::5]
print("Computing features finished.")


nb_local_envs = 20
print("Computing pointwise LFRE...")
soap_to_bispectrum_pointwise_lfre = compute_pointwise_lfre(soap_features, bispectrum_features, nb_local_envs)
bispectrum_to_soap_pointwise_lfre = compute_pointwise_lfre(bispectrum_features, soap_features, nb_local_envs)
print("Computing pointwise LFRE finished.")

print(f"LFRE(3-body, 4-body) = {np.linalg.norm(soap_to_bispectrum_pointwise_lfre)/np.sqrt(len(soap_to_bispectrum_pointwise_lfre))}")
print(f"LFRE(4-body, 3-body) = {np.linalg.norm(bispectrum_to_soap_pointwise_lfre)/np.sqrt(len(soap_to_bispectrum_pointwise_lfre))}")

In [None]:
fig, axes = plt.subplots(1,2, constrained_layout=True, figsize=(16,7.5))
vmax = 0.4
X, Y = np.meshgrid(np.linspace(0.7,0.9,9), np.linspace(-0.1,0.1,9))
pcm = axes[0].contourf(X, Y, soap_to_bispectrum_pointwise_lfre[81:].reshape(9,9).T, vmin=0, vmax=vmax)
axes[1].contourf(X, Y, bispectrum_to_soap_pointwise_lfre[81:].reshape(9,9).T, vmin=0, vmax=vmax)
axes[0].set_ylabel("v/π")
axes[1].set_ylabel("v/π")
axes[0].set_xlabel("u/π")
axes[1].set_xlabel("u/π")
axes[0].set_title("X minus LFRE(3-body, 4-body)")
axes[1].set_title("X minus LFRE(4-body, 3-body)")
fig.colorbar(pcm, ax=axes, label="", location="bottom")
plt.show()

It can see that the samples corresponding to the degenerate line at v/pi = 0 cannot be well reconstructed locally. The result stands more out for higher number of basis functions.