In [26]:
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns
import mpl_scatter_density  # This import registers the 'scatter_density' projection.
from matplotlib.colors import LinearSegmentedColormap
import numpy as np
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'seaborn'

In [3]:
def calculate_rmse(predictions, targets):
    """
    Calculate the Root Mean Square Error (RMSE) between predictions and targets.

    Args:
        predictions (np.ndarray): Predicted values.
        targets (np.ndarray): True values.

    Returns:
        float: The RMSE value.
    """
    return np.sqrt(((predictions - targets) ** 2).mean())

def calculate_mae(predictions, targets):
    """
    Calculate the Mean Absolute Error (MAE) between predictions and targets.

    Args:
        predictions (np.ndarray): Predicted values.
        targets (np.ndarray): True values.

    Returns:
        float: The MAE value.
    """
    return np.abs(predictions - targets).mean()

def calculate_r2(predictions, targets):
    """
    Calculate the R-squared (R²) between predictions and targets.

    Args:
        predictions (np.ndarray): Predicted values.
        targets (np.ndarray): True values.

    Returns:
        float: The R² value.
    """
    ss_res = np.sum((targets - predictions) ** 2)
    ss_tot = np.sum((targets - np.mean(targets)) ** 2)
    return 1 - (ss_res / ss_tot)


In [4]:
def calculate_metrics_for_energies_and_forces(E_pred_npy, E_target_npy, force_pred_npy, force_target_npy, energy_plot_name, force_plot_name):
    """
    Calculate RMSE, MAE, and R² for energies and forces from NPY arrays, produce separate
    density correlation plots for energy and force, and save them as two different figures.

    Args:
    E_pred_npy (numpy.ndarray): Array containing predicted energies.
    E_target_npy (numpy.ndarray): Array containing target energies.
    force_pred_npy (numpy.ndarray): Array containing predicted forces.
    force_target_npy (numpy.ndarray): Array containing target forces.
    energy_plot_name (str): File name for saving the energy correlation plot.
    force_plot_name (str): File name for saving the force correlation plot.

    Returns:
    tuple: (energy_rmse, energy_mae, energy_r2, force_rmse, force_mae, force_r2)
    """
    # Compute metrics
    energy_rmse = calculate_rmse(E_pred_npy, E_target_npy)
    energy_mae = calculate_mae(E_pred_npy, E_target_npy)
    energy_r2 = calculate_r2(E_pred_npy, E_target_npy)

    force_rmse = calculate_rmse(force_pred_npy, force_target_npy)
    force_mae = calculate_mae(force_pred_npy, force_target_npy)
    force_r2 = calculate_r2(force_pred_npy, force_target_npy)

    print(f"Energy RMSE: {energy_rmse}")
    print(f"Energy MAE: {energy_mae}")
    print(f"Energy R²: {energy_r2}")
    print(f"Force RMSE: {force_rmse}")
    print(f"Force MAE: {force_mae}")
    print(f"Force R²: {force_r2}")

    # Import required modules for plotting
    import mpl_scatter_density # This registers the 'scatter_density' projection
    from matplotlib.colors import LinearSegmentedColormap
    import numpy as np
    import matplotlib.pyplot as plt

    # Define custom colormap
    white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
        (0, '#ffffff'),
        (1e-20, '#440053'),
        (0.2, '#404388'),
        (0.4, '#2a788e'),
        (0.6, '#21a784'),
        (0.8, '#78d151'),
        (1, '#fde624'),
    ], N=256)


    def plot_density(X, Y, title, xlabel, ylabel, plot_name):
        fig = plt.figure(figsize=(8, 8))
        ax = fig.add_subplot(111, projection='scatter_density')
        
        # Determine the overall min and max for both X and Y
        overall_min = min(np.min(X), np.min(Y))
        overall_max = max(np.max(X), np.max(Y))
        
        # Set the same limits for both axes

        
        density = ax.scatter_density(X, Y, cmap=white_viridis, dpi=30)
        ax.set_xlim(overall_min, overall_max)
        ax.set_ylim(overall_min, overall_max)
        
        # Plot the diagonal line using the overall min and max
        ax.plot([overall_min, overall_max], [overall_min, overall_max], 'r--', alpha=0.5, linewidth=2)
        
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_title(title)
        ax.set_aspect('equal', adjustable='box')
        
        cbar = fig.colorbar(density, label='Number of points per pixel', shrink=0.8)
        plt.tight_layout()
        plt.savefig(plot_name, bbox_inches='tight', dpi=300)
        plt.close(fig)

    # Energy density correlation plot
    plot_density(E_target_npy, E_pred_npy, 'Energy Density Correlation', 
                 'True Energies', 'Predicted Energies', energy_plot_name)

    # Force density correlation plot
    plot_density(force_target_npy, force_pred_npy, 'Force Density Correlation', 
                 'True Forces', 'Predicted Forces', force_plot_name)

    return energy_rmse, energy_mae, energy_r2, force_rmse, force_mae, force_r2


### 1) load arrays with data

In [6]:
path_to_data = Path("data/AZ/thermal_MD_10k/")

#HIPNN data
E_hippynn = np.load(path_to_data / 'az_E_hippynn_kcal_mol.npy')
F_hippynn = np.load(path_to_data / 'az_F_hippynn_kcal_mol_A.npy')

# QM (DFT data)
E_QM = np.load(path_to_data / 'az_E_QM_kcal_mol.npy')
F_QM = np.load(path_to_data / 'az_F_QM_kcal_mol_A.npy')

# Torchani data
E_ani = np.load(path_to_data / 'az_E_torchani_kcal_mol.npy')
F_ani = np.load(path_to_data / 'az_F_torchani_kcal_mol_A.npy')

# AMBER-14 data
E_amber = np.load(path_to_data / 'az_E_amber_kcal_mol.npy')
F_amber = np.load(path_to_data / 'az_F_amber_kcal_mol_A.npy')

E_sage = np.load(path_to_data / 'az_E_sage_kcal_mol.npy')
F_sage = np.load(path_to_data / 'az_F_sage_kcal_mol_A.npy')

### Check that all energies are relative and make sense

# azobenzene

### Calculate MAE, RMSE and R² for all data arrays (ML vs QM, ML vs ML, FF vs FF etc)

1) hippynn vs QM
2) ani1x vs QM
3) amber14 vs QM
3) sage vs qm
5) hippynn vs ani1x
6) sage vs amber

In [7]:
cd data/AZ/thermal_MD_10k/

/vast/projects/ml4chem/NikitaFedik/DATA/ml-tps-ad-az/data/AZ/thermal_MD_10k


In [8]:
calculate_metrics_for_energies_and_forces(E_hippynn, 
                                          E_QM, 
                                          F_hippynn, 
                                          F_QM, 
                                          "E - hippynn vs QM.png",
                                          "F - hippynn vs QM.png"
                                          )


Energy RMSE: 0.9656671237943625
Energy MAE: 0.7727854684553248
Energy R²: 0.9839895143754854
Force RMSE: 2.0369794633693763
Force MAE: 1.3360739167707776
Force R²: 0.9921067988620822


  vmin = self._density_vmin(array)
  vmax = self._density_vmax(array)


(0.9656671237943625,
 0.7727854684553248,
 0.9839895143754854,
 2.0369794633693763,
 1.3360739167707776,
 0.9921067988620822)

In [9]:
calculate_metrics_for_energies_and_forces(E_ani, 
                                          E_QM, 
                                          F_ani,
                                          F_QM, 
                                          "E - ANI vs QM.png", 
                                          "F - ANI vs QM.png"
                                          )


Energy RMSE: 1.726801857085822
Energy MAE: 1.4391225564917316
Energy R²: 0.9488041654433794
Force RMSE: 5.071338804248245
Force MAE: 2.944867483306301
Force R²: 0.9510756437113436


(1.726801857085822,
 1.4391225564917316,
 0.9488041654433794,
 5.071338804248245,
 2.944867483306301,
 0.9510756437113436)

In [10]:
calculate_metrics_for_energies_and_forces(E_amber, 
                                          E_QM, 
                                          F_amber,
                                          F_QM, 
                                          "E - AMBER vs QM.png", 
                                          "F - AMBER vs QM.png"
                                          )

Energy RMSE: 4.419304957436206
Energy MAE: 3.5435198641630152
Energy R²: 0.6646814683945835
Force RMSE: 22.211120281178754
Force MAE: 11.833300274406376
Force R²: 0.06152912990638837


(4.419304957436206,
 3.5435198641630152,
 0.6646814683945835,
 22.211120281178754,
 11.833300274406376,
 0.06152912990638837)

In [11]:
calculate_metrics_for_energies_and_forces(E_sage, 
                                          E_QM, 
                                          F_sage,
                                          F_QM, 
                                          "E - SAGE vs QM.png",
                                          "F - SAGE vs QM.png"
                                          )

Energy RMSE: 3.386266226072238
Energy MAE: 2.664944317074759
Energy R²: 0.8031244379484879
Force RMSE: 19.53807221842546
Force MAE: 13.724532421944929
Force R²: 0.2738216616614363


(3.386266226072238,
 2.664944317074759,
 0.8031244379484879,
 19.53807221842546,
 13.724532421944929,
 0.2738216616614363)

In [12]:
calculate_metrics_for_energies_and_forces(E_ani, 
                                          E_hippynn, 
                                          F_ani,
                                          F_hippynn, 
                                          "E - ANI vs HIPPN.png",
                                          "F - ANI vs HIPPN.png"
                                          )

Energy RMSE: 1.2675967708537021
Energy MAE: 1.0099048850381769
Energy R²: 0.968266964367811
Force RMSE: 4.679064066971086
Force MAE: 2.760894931517121
Force R²: 0.9582806822249598


(1.2675967708537021,
 1.0099048850381769,
 0.968266964367811,
 4.679064066971086,
 2.760894931517121,
 0.9582806822249598)

In [13]:
calculate_metrics_for_energies_and_forces(E_sage, 
                                          E_amber, 
                                          F_sage,
                                          F_amber, 
                                          "E - SAGE vs AMBER.png",
                                          "F - SAGE vs AMBER.png"
                                          )

Energy RMSE: 3.0726373535774547
Energy MAE: 2.476236434343668
Energy R²: 0.7773327218059674
Force RMSE: 16.74457134127208
Force MAE: 11.648209567150543
Force R²: 0.7121296408501689


(3.0726373535774547,
 2.476236434343668,
 0.7773327218059674,
 16.74457134127208,
 11.648209567150543,
 0.7121296408501689)