In [1]:
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns
import mpl_scatter_density  # This import registers the 'scatter_density' projection.
from matplotlib.colors import LinearSegmentedColormap
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator, FormatStrFormatter, MaxNLocator

In [2]:
def calculate_rmse(predictions, targets):
    """
    Calculate the Root Mean Square Error (RMSE) between predictions and targets.

    Args:
        predictions (np.ndarray): Predicted values.
        targets (np.ndarray): True values.

    Returns:
        float: The RMSE value.
    """
    return np.sqrt(((predictions - targets) ** 2).mean())

def calculate_mae(predictions, targets):
    """
    Calculate the Mean Absolute Error (MAE) between predictions and targets.

    Args:
        predictions (np.ndarray): Predicted values.
        targets (np.ndarray): True values.

    Returns:
        float: The MAE value.
    """
    return np.abs(predictions - targets).mean()

def calculate_r2(predictions, targets):
    """
    Calculate the R-squared (R²) between predictions and targets.

    Args:
        predictions (np.ndarray): Predicted values.
        targets (np.ndarray): True values.

    Returns:
        float: The R² value.
    """
    ss_res = np.sum((targets - predictions) ** 2)
    ss_tot = np.sum((targets - np.mean(targets)) ** 2)
    return 1 - (ss_res / ss_tot)


In [3]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.ticker import MaxNLocator

def plot_density(X, Y, title, xlabel, ylabel, plot_name, axis_limits):
    # Define the custom white_viridis colormap.
    white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
        (0, '#ffffff'),
        (1e-20, '#440053'),
        (0.2, '#404388'),
        (0.4, '#2a788e'),
        (0.6, '#21a784'),
        (0.8, '#78d151'),
        (1, '#fde624')
    ], N=256)
    
    # Create the figure and main axis (with the special scatter_density projection).
    # Reserve space on the right for an external colorbar.
    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw={'projection': 'scatter_density'})
    fig.subplots_adjust(right=0.85)

    # Use the fixed axis limits provided (defaults: (-30, 45))
    low, high = axis_limits

    # Force fixed limits BEFORE plotting (this may help the density projection)
    ax.set_xlim(low, high)
    ax.set_ylim(low, high)

    # Plot the density scatter.
    density = ax.scatter_density(X, Y, cmap=white_viridis, dpi=30)

    # Re-apply fixed limits to override any autoscaling done by scatter_density.
    ax.set_xlim(low, high)
    ax.set_ylim(low, high)

    # Force the data area to be strictly square.
    # If available (Matplotlib >=3.3) use set_box_aspect; otherwise fall back.
    try:
        ax.set_box_aspect(1)
    except AttributeError:
        ax.set_aspect('equal', adjustable='box')

    # Draw the diagonal line spanning the entire fixed range.
    # Using data coordinates ensures the line goes from (low, low) to (high, high).
    try:
        ax.axline((low, low), slope=1, color='r', linestyle='--', linewidth=2)
    except AttributeError:
        ax.plot([low, high], [low, high], 'r--', linewidth=2)
        # Use MaxNLocator to compute (roughly) 5 “nice” ticks based on the fixed limits.
    tick_locator = MaxNLocator(nbins=5)
    ticks = tick_locator.tick_values(low, high)
    ax.set_xticks(ticks)
    ax.set_yticks(ticks)
    formatter = FormatStrFormatter('%g')
    ax.xaxis.set_major_formatter(formatter)
    ax.yaxis.set_major_formatter(formatter)

    # Set axis labels and title.
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)

    # Make plot box lines thicker
    ax.spines['top'].set_linewidth(3)
    ax.spines['right'].set_linewidth(3)
    ax.spines['bottom'].set_linewidth(3)
    ax.spines['left'].set_linewidth(3)
    ax.tick_params(axis='both', which='major', length=8, width=2)
    # ax.tick_params(axis='both', which='minor', length=7, width=2)
    # Make ticks longer and thicker


    # Add an external colorbar whose height exactly matches the data area.

    pos = ax.get_position()  # [left, bottom, width, height] in figure coordinates.
    cbar_gap   = 0.02  # gap between main plot and colorbar (figure fraction)
    cbar_width = 0.03  # width of the colorbar (figure fraction)
    cbar_ax = fig.add_axes([pos.x1 + cbar_gap, pos.y0, cbar_width, pos.height])
    cbar = fig.colorbar(density, cax=cbar_ax)
    cbar.ax.tick_params(width=2, length=8)
    cbar.set_label('Number of points per pixel')

    # Make colorbar outline and ticks thicker
    cbar.outline.set_linewidth(3)
    

    # Save the final figure and close.
    plt.savefig(plot_name, bbox_inches='tight', dpi=300)
    plt.close(fig)

In [4]:
def calculate_metrics_for_energies_and_forces(E_pred_npy, E_target_npy, force_pred_npy, force_target_npy, energy_plot_name, force_plot_name):
    """
    Calculate RMSE, MAE, and R² for energies and forces from NPY arrays, produce separate
    density correlation plots for energy and force, and save them as two different figures.

    Args:
    E_pred_npy (numpy.ndarray): Array containing predicted energies.
    E_target_npy (numpy.ndarray): Array containing target energies.
    force_pred_npy (numpy.ndarray): Array containing predicted forces.
    force_target_npy (numpy.ndarray): Array containing target forces.
    energy_plot_name (str): File name for saving the energy correlation plot.
    force_plot_name (str): File name for saving the force correlation plot.

    Returns:
    tuple: (energy_rmse, energy_mae, energy_r2, force_rmse, force_mae, force_r2)
    """
    # Compute metrics
    energy_rmse = calculate_rmse(E_pred_npy, E_target_npy)
    energy_mae = calculate_mae(E_pred_npy, E_target_npy)
    energy_r2 = calculate_r2(E_pred_npy, E_target_npy)

    force_rmse = calculate_rmse(force_pred_npy, force_target_npy)
    force_mae = calculate_mae(force_pred_npy, force_target_npy)
    force_r2 = calculate_r2(force_pred_npy, force_target_npy)

    print(f"Energy RMSE: {energy_rmse}")
    print(f"Energy MAE: {energy_mae}")
    print(f"Energy R²: {energy_r2}")
    print(f"Force RMSE: {force_rmse}")
    print(f"Force MAE: {force_mae}")
    print(f"Force R²: {force_r2}")

    # Import required modules for plotting
    import mpl_scatter_density # This registers the 'scatter_density' projection
    from matplotlib.colors import LinearSegmentedColormap
    import numpy as np
    import matplotlib.pyplot as plt

    # Define custom colormap

    # Energy density correlation plot
    plot_density(E_target_npy, E_pred_npy, 'Energy Density Correlation', 
                 'True Energies', 'Predicted Energies', energy_plot_name, axis_limits=(-20, 20))

    # Force density correlation plot
    plot_density(force_target_npy, force_pred_npy, 'Force Density Correlation', 
                 'True Forces', 'Predicted Forces', force_plot_name, axis_limits=(-100, 100))

    return energy_rmse, energy_mae, energy_r2, force_rmse, force_mae, force_r2


#### 1) load arrays with AD data

In [12]:
path_to_data = Path("data/AD/thermal_MD_10k")

#HIPNN data
E_hippynn = np.load(path_to_data / 'ad_E_hippynn_kcal_mol.npy')
F_hippynn = np.load(path_to_data / 'ad_F_hippynn_kcal_mol_A.npy')

# QM (DFT data)
E_QM = np.load(path_to_data / 'ad_E_QM_kcal_mol.npy')
F_QM = np.load(path_to_data / 'ad_F_QM_kcal_mol_A.npy')

# Torchani data
E_ani = np.load(path_to_data / 'ad_E_torchani_kcal_mol.npy')
F_ani = np.load(path_to_data / 'ad_F_torchani_kcal_mol_A.npy')

# AMBER-14 data
E_amber = np.load(path_to_data / 'ad_E_amber_kcal_mol.npy')
F_amber = np.load(path_to_data / 'ad_F_amber_kcal_mol_A.npy')

E_sage = np.load(path_to_data / 'ad_E_sage_kcal_mol.npy')
F_sage = np.load(path_to_data / 'ad_F_sage_kcal_mol_A.npy')

#### 2) Check that all energies are relative and make sense

In [13]:
E_hippynn

array([-7.02381591,  6.40162057,  9.98584577, ...,  0.02945462,
        5.6417183 ,  3.36939794])

In [16]:
E_ani

array([-7.94667023,  4.56330896,  9.08792428, ..., -0.69785468,
        3.83245517,  2.40544772])

In [18]:
E_sage

array([-8.71370626,  5.63151488,  8.36693758, ...,  0.73437764,
        7.95960959,  7.91668771])

# alanine dipeptide 

### Calculate MAE, RMSE and R² for all data arrays (ML vs QM, ML vs ML, FF vs FF etc)

1) hippynn vs QM
2) ani1x vs QM
3) amber14 vs QM
3) sage vs qm
5) hippynn vs ani1x
6) sage vs amber

In [19]:
cd data/AD/thermal_MD_10k/

/vast/projects/ml4chem/NikitaFedik/DATA/ml-tps-ad-az/data/AD/thermal_MD_10k


In [21]:
calculate_metrics_for_energies_and_forces(E_hippynn, 
                                          E_QM, 
                                          F_hippynn, 
                                          F_QM, 
                                          "E - hippynn vs QM.png",
                                          "F - hippynn vs QM.png"
                                          )


Energy RMSE: 0.7213936941053319
Energy MAE: 0.601858551956262
Energy R²: 0.9836636588045312
Force RMSE: 1.1427880552625351
Force MAE: 0.8016578102236134
Force R²: 0.9972808720014851


(0.7213936941053319,
 0.601858551956262,
 0.9836636588045312,
 1.1427880552625351,
 0.8016578102236134,
 0.9972808720014851)

In [22]:
calculate_metrics_for_energies_and_forces(E_ani, 
                                          E_QM, 
                                          F_ani,
                                          F_QM, 
                                          "E - ANI vs QM.png", 
                                          "F - ANI vs QM.png"
                                          )


Energy RMSE: 0.7936402518211633
Energy MAE: 0.6284503373053274
Energy R²: 0.9802276866540939
Force RMSE: 2.73560082473229
Force MAE: 1.9143588289415634
Force R²: 0.9844186989404786


(0.7936402518211633,
 0.6284503373053274,
 0.9802276866540939,
 2.73560082473229,
 1.9143588289415634,
 0.9844186989404786)

In [23]:
calculate_metrics_for_energies_and_forces(E_amber, 
                                          E_QM, 
                                          F_amber,
                                          F_QM, 
                                          "E - AMBER vs QM.png", 
                                          "F - AMBER vs QM.png"
                                          )

Energy RMSE: 3.666025333192128
Energy MAE: 2.8579214816631735
Energy R²: 0.5781082476130772
Force RMSE: 11.574142670604223
Force MAE: 7.925241230060404
Force R²: 0.7210823933297708


(3.666025333192128,
 2.8579214816631735,
 0.5781082476130772,
 11.574142670604223,
 7.925241230060404,
 0.7210823933297708)

In [24]:
calculate_metrics_for_energies_and_forces(E_sage, 
                                          E_QM, 
                                          F_sage,
                                          F_QM, 
                                          "E - SAGE vs QM.png",
                                          "F - SAGE vs QM.png"
                                          )

Energy RMSE: 3.8626893750419335
Energy MAE: 3.0239076443407606
Energy R²: 0.531629339942961
Force RMSE: 18.463967610942618
Force MAE: 13.461596405164078
Force R²: 0.29017984571494293


(3.8626893750419335,
 3.0239076443407606,
 0.531629339942961,
 18.463967610942618,
 13.461596405164078,
 0.29017984571494293)

In [25]:
calculate_metrics_for_energies_and_forces(E_ani, 
                                          E_hippynn, 
                                          F_ani,
                                          F_hippynn, 
                                          "E - ANI vs HIPPN.png",
                                          "F - ANI vs HIPPN.png"
                                          )

Energy RMSE: 1.027208666606758
Energy MAE: 0.8338911678763193
Energy R²: 0.970677078869283
Force RMSE: 2.7693284872075536
Force MAE: 1.9073441372702316
Force R²: 0.9840398362163625


(1.027208666606758,
 0.8338911678763193,
 0.970677078869283,
 2.7693284872075536,
 1.9073441372702316,
 0.9840398362163625)

In [26]:
calculate_metrics_for_energies_and_forces(E_sage, 
                                          E_amber, 
                                          F_sage,
                                          F_amber, 
                                          "E - SAGE vs AMBER.png",
                                          "F - SAGE vs AMBER.png"
                                          )

Energy RMSE: 2.2882505398714583
Energy MAE: 1.8113194674184225
Energy R²: 0.8884857816757292
Force RMSE: 15.274210535993536
Force MAE: 10.674600473383975
Force R²: 0.5714468769533771


(2.2882505398714583,
 1.8113194674184225,
 0.8884857816757292,
 15.274210535993536,
 10.674600473383975,
 0.5714468769533771)