# 3: Analyse Results and Visualize Predictions

## Training Analysis

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np

import rholearn.io
from rholearn.analysis import compile_loss_data, average_losses
from rholearn.plots import loss_vs_epoch, learning_curve, save_fig_mpltex

In [None]:
# Define paths to the various important directories
root_dir = "/Users/joe.abbott/Documents/phd/code/qml/rho_learn/docs/example/azoswitch"
data_dir = os.path.join(root_dir, "data", "partitions")
run_dir = os.path.join(root_dir, "simulations", "02_nonlinear")

# Create directories to save plots and visualizations
plot_dir = os.path.join(run_dir, "plots")
rholearn.io.check_or_create_dir(plot_dir)

In [None]:
# Define the range of exercises and subsets to compile data for
exercises = [0]
subsets = [0]

# Compile data
train, test = compile_loss_data(run_dir, exercises, subsets)
mean_train = average_losses(train)
mean_test = average_losses(test)

In [None]:
# Log-log plot of loss vs epoch
fig, ax = loss_vs_epoch([mean_train, mean_test], sharey=False)

# Format
fig.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(12)
ax[0].set_ylabel(r"train loss / Ha")
ax[1].set_ylabel(r"test loss / Ha")
ax[1].legend(labels=[f"subset {s}" for s in np.sort(list(mean_test.keys()))])

# Save
save_fig_mpltex(fig, os.path.join(plot_dir, "loss_vs_epoch"))

In [None]:
# Log-log learning curve plot of loss vs training set size
point = "final"  # take the final epoch loss, as opposed to "best" (i.e. lowest)
fig, ax = learning_curve(
    [mean_train, mean_test],
    np.load(os.path.join(data_dir, "subset_sizes_train.npy"))[:1],
    point=point,
)

# Format
fig.tight_layout()
ax.set_ylabel(point + r" loss")
ax.legend(labels=["train", "test"])

# Save
save_fig_mpltex(fig, os.path.join(plot_dir, "learning_curve"))

## Validation Structure

In [None]:
from rholearn.io import unpickle_dict, load_tensormap_to_torch, load_torch_object

# Load the settings dict from file
settings = unpickle_dict(os.path.join(run_dir, "settings.pickle"))

Load the lambda-SOAP TensorMap for the validation structure and its QM (i.e. the
target) electron density. Then, load the best model and make a prediction on the
validation structure. 

In [None]:
# Load the lambda-SOAP TensorMaps for the input and output validation structure
in_val = load_tensormap_to_torch(
    os.path.join(data_dir, "in_val.npz"), **settings["torch"]
)
out_val = load_tensormap_to_torch(
    os.path.join(data_dir, "out_val.npz"), **settings["torch"]
)

# Load the best model from training. Upon inspection of the analysis plots above
# the models trained on the largest training subset (3) have the lowest test
# loss. For simplicity we'll just load the model from exercise 1, subset 3, but
# in principle we could average the weights from multiple models.
model = load_torch_object(
    os.path.join(run_dir, "exercise_0", "subset_0", "model.pt"),
    device=settings["torch"]["device"],
    torch_obj_str="model",
)

# Make a prediction on the validation structure
out_val_pred = model(in_val)

Plot a parity plot of the target electron density coefficients against
predicted, coloured by $\lambda$ value $\in [0, ..., 5]$

In [None]:
from rholearn.plots import parity_plot

fig, ax = parity_plot(out_val, out_val_pred, color_by="spherical_harmonics_l")
lim = [1e-6, 1e2]
ax.set_xlim(lim)
ax.set_ylim(lim)
ax.set_aspect("equal")
ax.set_xlabel("target density coefficient value")
ax.set_ylabel("predicted density coefficient value")
ax.legend()

We can also colour the plot according to the elemental species, where index 1
indicates Hydrogen, 6 Carbon, 7 Nitrogen, 8 Oxygen, 16 Sulfur

In [None]:
fig, ax = parity_plot(out_val, out_val_pred, color_by="species_center")
ax.set_xlim(lim)
ax.set_ylim(lim)
ax.set_aspect("equal")
ax.set_xlabel("target density coefficient value")
ax.set_ylabel("predicted density coefficient value")
ax.legend()

## Visualization of the Electron Density

In [None]:
from rholearn.utils import rename_tensor, delta_tensor
from azoswitch_utils import drop_structure_label

# Rename tensors to fit Q-Stack naming convention
new_keys_names = ["spherical_harmonics_l", "element"]
out_val = rename_tensor(drop_structure_label(out_val), keys_names=new_keys_names)
out_val_pred = rename_tensor(drop_structure_label(out_val_pred), keys_names=new_keys_names)

# Generate a delta electron density: QM - ML. This helps to visualize where the
# ML model isn't working so well
out_val_delta = delta_tensor(input=out_val_pred, target=out_val, absolute=True)

In [None]:
from qstack import compound

# Build a molecule object using Q-Stack. The basis used in density fitting for
# this particular dataset was ccpvqz jkfit
molecule = compound.xyz_to_mol(
    os.path.join(root_dir, "data", "xyz", val_xyz), basis="ccpvqz jkfit"
)

Vectorize the density coefficients and convert them to cube file format using Q-Stack

In [None]:
from qstack import equio
from qstack.fields import density2file

# Vectorize the coefficients from each of the TensorMaps
vect_coeffs_target = equio.tensormap_to_vector(molecule, out_val)
vect_coeffs_input = equio.tensormap_to_vector(molecule, out_val_pred)
vect_coeffs_delta = equio.tensormap_to_vector(molecule, out_val_delta)

# Define a number of grid points to represent the electon density on
n = 90

# Convert the basis function coefficients to a cube file
for (coeffs, filename) in [
    (vect_coeffs_target, "out_val.cube"),
    (vect_coeffs_input, "out_val_pred.cube"),
    (vect_coeffs_delta, "out_val_delta.cube"),
]:
    density2file.coeffs_to_cube(
        molecule,
        coeffs,
        os.path.join(plot_dir, filename),
        nx=n,
        ny=n,
        nz=n,
        resolution=None,
    )

In [None]:
import py3Dmol

# Visualize the target density
for filename in ["out_val.cube", "out_val_pred.cube", "out_val_delta.cube"]:
    v = py3Dmol.view(os.path.join(plot_dir, filename))
    v.setStyle({"line": {}})
    v.addVolumetricData(
        open(os.path.join(plot_dir, filename), "r").read(),
        "cube",
        {"isoval": 0.01, "color": "blue", "opacity": 0.8},
    )
    v.show()