# Optimal radial basis construction

This notebook provides a discussion, and a practical example of the application, of the construction of a data-driven optimal radial basis to expand the atom-centered neighbor density. 
See [REF] for a discussion of the idea and implementation.

In [None]:
from ase.io import read
from copy import deepcopy
import numpy as np
import matplotlib.pyplot as plt

from rascal.representations import SphericalExpansion, SphericalInvariants
from rascal.utils import (get_radial_basis_covariance, get_radial_basis_pca, 
                          get_radial_basis_projections, get_optimal_radial_basis_hypers )
from rascal.utils import radial_basis

# Loads the structures

In [None]:
import urllib.request
# a collection of distorted ethanol molecules from the ANI-1 dataset 
# (see https://github.com/isayev/ANI1_dataset) with energies and forces computed using DFTB+ 
# (see https://www.dftbplus.org/)
url = 'https://raw.githubusercontent.com/cosmo-epfl/librascal-example-data/833b4336a7daf471e16993158322b3ea807b9d3f/inputs/molecule_conformers_dftb.xyz'
# Download the file from `url`, save it in a temporary directory and get the
# path to it (e.g. '/tmp/tmpb48zma.txt') in the `structures_fn` variable:
structures_fn, headers = urllib.request.urlretrieve(url)
structures_fn

In [None]:
# Total number of structure to load
N = 100

# load the structures
frames = read(structures_fn,':{}'.format(N))

# Demonstrate the optimization

first, we compute the density expansion coefficients on a representative dataset

In [None]:
spherical_expansion_hypers = {
    "interaction_cutoff": 3,
    "max_radial": 30,
    "max_angular": 8,
    "gaussian_sigma_constant": 0.3,
    "gaussian_sigma_type": "Constant",
    "cutoff_smooth_width": 0.5,
    "radial_basis": "DVR",
}

spex = SphericalExpansion(**spherical_expansion_hypers)

In [None]:
%%time
feats = spex.transform(frames).get_features_by_species(spex)

rotation-invariant covariant matrices are computed separately for each species and angular channel
then principal components are computed

In [None]:
%%time
cov = get_radial_basis_covariance(spex, feats)

In [None]:
%%time
p_val, p_vec = get_radial_basis_pca(cov)

we can visualize the convergence of the principal components. hint: it's FAST!

In [None]:
plt.loglog(p_val[(1,)][0], 'r', label="H, l=0")
plt.loglog(p_val[(1,)][3], 'b', label="C, l=0")
plt.loglog(p_val[(6,)][0], 'r--', label="H, l=3")
plt.loglog(p_val[(6,)][3], 'b--', label="C, l=3")
plt.ylim(1e-12,1e-4)
plt.xlabel("n")
plt.ylabel("$\lambda$")
plt.legend()

the principal components can be used as projectors to compute a contracted basis. 10 components are (way) more than enough!

In [None]:
p_mat = get_radial_basis_projections(p_vec, 10)

In [None]:
spherical_expansion_optimal_hypers = {
    "interaction_cutoff": 3,
    "max_radial": 10,
    "max_angular": 8,
    "gaussian_sigma_constant": 0.3,
    "gaussian_sigma_type": "Constant",
    "cutoff_smooth_width": 0.5,
    "radial_basis": "DVR",
    "optimization" : {
        "RadialDimReduction" : {"projection_matrices": p_mat},
        "Spline" : {"accuracy": 1e-8}
    }
}

spex_optimal = SphericalExpansion(**spherical_expansion_optimal_hypers)

evaluation is much faster because the contracted features are computed directly

In [None]:
%%time
feats_optimal = spex_optimal.transform(frames).get_features(spex_optimal)

we can also see how these functions look like "in real space"

In [None]:
r_grid = np.linspace(1e-5,3.9,1000)
dvr = radial_basis.radial_basis_functions_dvr(r_grid,
                                              spherical_expansion_hypers["max_radial"],
                                              spherical_expansion_hypers["interaction_cutoff"],
                                              spherical_expansion_hypers["gaussian_sigma_constant"])

In [None]:
p_dvr_h = p_vec[(1,)][0,:,:10].T @ dvr

In [None]:
for y in dvr:
    plt.plot(r_grid, r_grid*y, ls=":")
plt.plot(r_grid, r_grid*p_dvr_h[0], 'k')
plt.plot(r_grid, r_grid*p_dvr_h[1], 'b')
plt.plot(r_grid, r_grid*p_dvr_h[2], 'r')
plt.ylim(-5,5)

... and this works equally well with GTOs (and optimal functions are the same!)

In [None]:
spherical_expansion_hypers.update({"radial_basis": "GTO", "max_radial":20})
spex = SphericalExpansion(**spherical_expansion_hypers)

feats = spex.transform(frames).get_features_by_species(spex)
cov = get_radial_basis_covariance(spex, feats)
p_val, p_vec = get_radial_basis_pca(cov)

In [None]:
gto = radial_basis.radial_basis_functions_gto(r_grid,
                                              spherical_expansion_hypers["max_radial"],
                                              spherical_expansion_hypers["interaction_cutoff"])
p_gto_h = p_vec[(1,)][0,:,:10].T @ gto

In [None]:
for y in gto:
    plt.plot(r_grid, r_grid*y, ls=":")
plt.plot(r_grid, p_gto_h[0], 'k')
plt.plot(r_grid, p_gto_h[1], 'b')
plt.plot(r_grid, -p_gto_h[2], 'r')
plt.ylim(-5,5)

# Streamlined helper function

In [None]:
soap_hypers = {
    "soap_type": "PowerSpectrum",
    "interaction_cutoff": 3,
    "max_radial": 8,
    "max_angular": 5,
    "gaussian_sigma_constant": 0.3,
    "gaussian_sigma_type": "Constant",
    "cutoff_smooth_width": 0.5,
    "radial_basis": "GTO",
    "normalize": False
}

In [None]:
soap_hypers = get_optimal_radial_basis_hypers(soap_hypers, frames, expanded_max_radial=20)

In [None]:
soap_optimal = SphericalInvariants(**soap_hypers)

### In this example notebook we show how to compute spherical invariants for optimized radial basis functions.

In [None]:
import copy
import numpy as np

from rascal.representations import SphericalExpansion, SphericalInvariants
import ase

def construct_dimer_frames(begin_range=1, end_range=2, step_size=0.01):
    """
    Construct ase dimer frames from `range_begin` to `range_end` with uniformly
    with step size `step_size`
    """
    assert begin_range < end_range
    nb_dimers = int((end_range - begin_range) / step_size)
    distance = begin_range

    frames = [ase.Atoms("CH")]
    # large enough cell
    frames[0].cell = [end_range + 50, end_range + 50, end_range + 50]
    frames[0].pbc = [False, False, False]
    frames[0][0].position = [0, 0, 0]
    frames[0][1].position = [0, 0, distance]

    for i in range(nb_dimers):
        distance += step_size
        frames.append(copy.deepcopy(frames[i]))
        frames[i + 1][1].position = [0, 0, distance]
    return frames


frames = construct_dimer_frames(begin_range=1, end_range=2, step_size=0.01)




In [None]:
n_environments = sum([len(frame) for frame in frames])
max_radial = 5
expanded_max_radial = 20
max_angular = 0
species = [int(sp) for sp in np.unique([frame.numbers for frame in frames])]
n_species = len(species)
gaussian_sigma = 0.5
cutoff = 5

spherical_expansion_hypers = {
    "interaction_cutoff": cutoff,
    "max_radial": expanded_max_radial,
    "max_angular": max_angular,
    "gaussian_sigma_constant": gaussian_sigma,
    "gaussian_sigma_type": "Constant",
    "cutoff_smooth_width": 0.5,
    "radial_basis": "DVR",
}

# compute projection matrices
spherical_expansion_calculator = SphericalExpansion(**spherical_expansion_hypers)
spherical_expansion_coefficients = spherical_expansion_calculator.transform(
    frames
).get_features(spherical_expansion_calculator)

invariant_covariance_matrices = compute_invariant_covariance_matrices(
    spherical_expansion_coefficients, n_species, expanded_max_radial, max_angular
)
projection_matrices = {}
for a_idx, species in enumerate(species):
    projection_matrices[species] = []
    for l in range(max_angular + 1):
        projection_matrices[species].append(
            compute_projection_matrix(
                invariant_covariance_matrices[a_idx, l], max_radial
            ).tolist()
        )

### Comute the hydrogen channel for the carbon environments


In [None]:
spherical_invariant_hypers = copy.deepcopy(spherical_expansion_hypers)

# Take the large max radial value as ground truth
spherical_invariant_hypers["soap_type"] = "RadialSpectrum"
spherical_invariant_hypers["normalize"] = False
spherical_invariant_hypers["max_radial"] = expanded_max_radial

full_radial_spectrum_calculator = SphericalInvariants(**spherical_invariant_hypers)
full_radial_spectrum = full_radial_spectrum_calculator.transform(
    frames
).get_features_by_species(full_radial_spectrum_calculator)[(1,)][0::2]


# Any soap_type can be used with RadialDimReduction,
# we choose RadialSpectrum to make radial plots
spherical_invariant_hypers["soap_type"] = "RadialSpectrum"
spherical_invariant_hypers["normalize"] = False
spherical_invariant_hypers["max_radial"] = max_radial

radial_spectrum_calculator = SphericalInvariants(**spherical_invariant_hypers)
radial_spectrum = radial_spectrum_calculator.transform(frames).get_features_by_species(
    radial_spectrum_calculator
)[(1,)][0::2]


# optimized projection from expanded_max_radial to max_radial basis functions
spherical_invariant_hypers["soap_type"] = "RadialSpectrum"
spherical_invariant_hypers["normalize"] = False
optimization = {}
optimization["RadialDimReduction"] = {"projection_matrices": projection_matrices}
optimization["Spline"] = {"accuracy": 1e-8}
spherical_invariant_hypers["optimization"] = optimization

optimized_radial_spectrum_calculator = SphericalInvariants(**spherical_invariant_hypers)
optimized_radial_spectrum = optimized_radial_spectrum_calculator.transform(
    frames
).get_features_by_species(optimized_radial_spectrum_calculator)[(1,)][0::2]

### 

### We compute the real space reconstruction of the radial spectrum for the hydrogen channel of the carbon environments

In [None]:
radial_grid = np.linspace(0.2, 5, 100)

# some random carbon environment
i_env = 50

# R_n(r) on radial grid
expanded_radial_basis_functions = radial_basis_functions_dvr(
    radial_grid, expanded_max_radial, cutoff, gaussian_sigma
)
radial_basis_functions = radial_basis_functions_dvr(
    radial_grid, max_radial, cutoff, gaussian_sigma
)


real_space_full_radial_spectrum = full_radial_spectrum @ expanded_radial_basis_functions
real_space_radial_spectrum = radial_spectrum @ radial_basis_functions
# Compute optimized radial basis function R_q for hydrogen channnel and angular channel = 0
optimized_radial_basis_functions = (
    np.array(projection_matrices[1][0]) @ expanded_radial_basis_functions
)
real_space_optimized_radial_spectrum = (
    optimized_radial_spectrum @ optimized_radial_basis_functions
)

In [None]:
import matplotlib.pyplot as plt

plt.plot(
    radial_grid,
    real_space_radial_spectrum[i_env],
    color="b",
    lw=4,
    label="standard DVR: $\sum_n^{" + str(max_radial) + "} <H n00|ρ_C> R_n(r)$",
)
plt.plot(
    radial_grid,
    real_space_optimized_radial_spectrum[i_env],
    color="g",
    lw=4,
    label="optimized DVR: $\sum_q^{" + str(max_radial) + "} <H q00|ρ_C> R_q(r)$",
)
plt.plot(
    radial_grid,
    real_space_full_radial_spectrum[i_env],
    color="r",
    linestyle="--",
    lw=4,
    label="full DVR: $\sum_n^{" + str(expanded_max_radial) + "} <H n00|ρ_C> R_n(r)$",
)
plt.title(
    "Comparison for DVR vs optimized DVR\n for radial spectrum real space reconstruction"
)
plt.legend()
plt.show()
plt.close()

#### The full DVR real space radial spectrum can be better reconstructed with optimized DVR than with the standard DVR.