In [1]:
import ase
from ase import Atoms
from ase.io import read,write
import numpy as np
from pyace import PyACECalculator
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import glob
from itertools import cycle
from quippy.potential import Potential

mpl.rcParams['figure.dpi'] = 150

In [5]:
potname='../../models/SiH-ACE-25.yaml'
pot = PyACECalculator(potname)

In [13]:
def get_pred_en(row, model_calc):
    atom=row['ase_atoms']
    atom.calc=model_calc
    en=atom.get_potential_energy(force_consistent=True)
    return en

def get_pred_force(row, model_calc):
    atom=row['ase_atoms']
    atom.calc=model_calc
    forces=atom.get_forces()
    return forces

def get_nb_atoms(atom):
    return len(atom)

In [7]:
### Internal validation set

In [None]:
def umap_config(row):
    config=row["config"]
    if config=='cbulk' or config=='SnB':
        umap_config='crystalline'
    elif config=='amorph' or config=='ahighT':
        umap_config='amorphous'
    elif config=='cluster' or config=='sp':
        umap_config='cluster'
    elif config=='metabulk' or config=='high-p':
        umap_config='high-p'
    elif config=='a-surface' or config=='c-surface':
        umap_config='surface'
    elif config=='liq':
        umap_config='liquid'
    elif config=='mol':
        umap_config='molecule'
    else:
        print(config)
    return umap_config


In [11]:
df=pd.read_pickle('../../data/datasets/combined-to-Ite6-filtered-Fmag-50-Emax-0-min-Si-1.6-SiH-1.0-test.pkl.gzip', compression='gzip')
df['umap_config']=df.apply(umap_config, axis=1)


In [12]:
for config in df.umap_config.unique():
    print(config)
    df_tmp=df[df.umap_config==config]
    print(df_tmp.iteration.unique())
    print(len(df_tmp.ase_atoms.values))

    print(f'tot nb atoms {np.sum(df_tmp.nb_atoms)}')
    print(f'Si nb atoms {np.sum(df_tmp.nSi.values)}')
    print(f'H nb atoms {np.sum(df_tmp.nH.values)}')

crystalline
[0 1]
323
tot nb atoms 13806
Si nb atoms 13065
H nb atoms 741
amorphous
[0 1 6]
510
tot nb atoms 71517
Si nb atoms 67789
H nb atoms 3728
liquid
[0 1 2 5 6]
176
tot nb atoms 17721
Si nb atoms 14304
H nb atoms 3417
high-p
[0 4]
180
tot nb atoms 4152
Si nb atoms 4152
H nb atoms 0
molecule
[0 4]
111
tot nb atoms 1407
Si nb atoms 257
H nb atoms 1150
cluster
[2 3 6]
97
tot nb atoms 625
Si nb atoms 200
H nb atoms 425
surface
[3]
14
tot nb atoms 1397
Si nb atoms 699
H nb atoms 698


In [18]:
df['ace_pred']=df.apply(lambda row: get_pred_en(row, pot), axis=1)

In [21]:
df['ace_pred_f']=df.apply(lambda row: get_pred_force(row, pot), axis=1)

In [22]:
rmse_by_config = (
    df.groupby('umap_config')
    .apply(lambda g: np.sqrt(((g['e_corrected_per_atom'] - g['ace_pred'])**2).mean())*10**3)
    .reset_index(name='Energy_RMSE')
)
rmse_by_config

Unnamed: 0,umap_config,Energy_RMSE
0,amorphous,863060.898654
1,cluster,21234.351808
2,crystalline,377012.507478
3,high-p,192813.847716
4,liquid,581852.711001
5,molecule,45158.550155
6,surface,472659.944986


In [23]:
config_types=df.umap_config.unique()

for config_type in config_types:
    try:
        df_tmp=df[df['umap_config']==config_type]
        frames=df_tmp.ase_atoms.values.tolist()

        numbers=[atoms.numbers for atoms in df_tmp.ase_atoms]
        numbers=[x for xs in numbers for x in xs]
        numbers=np.asarray(numbers)

        dft_forces=np.concatenate(df_tmp.forces.values)

        dft_forces_Si=dft_forces[numbers==14]
        dft_forces_H=dft_forces[numbers==1]
        dft_forces_Si = dft_forces_Si.ravel()
        dft_forces_H = dft_forces_H.ravel()

        dft_forces = dft_forces.ravel()
        ace_forces=df_tmp.ace_pred_f.values
        ace_forces=[x for xs in ace_forces for x in xs]
        ace_forces=np.asarray(ace_forces)

        forces_si = ace_forces[numbers == 14]
        forces_h = ace_forces[numbers == 1]

        ace_forces = np.concatenate(ace_forces).ravel()
        forces_si = np.concatenate(forces_si).ravel()
        forces_h = np.concatenate(forces_h).ravel()

        rmse_f_Si = np.sqrt(np.mean((dft_forces_Si - forces_si)**2)) * 1e3
        rmse_f_H = np.sqrt(np.mean((dft_forces_H - forces_h)**2)) * 1e3
        rmse_f = np.sqrt(np.mean((dft_forces - ace_forces)**2)) * 1e3

        print(config_type)
        print(f'rmse_f_Si: {rmse_f_Si}')
        print(f'rmse_f_H: {rmse_f_H}')
        print(f'rmse_f: {rmse_f}')
    
    except:
        print(config_type)
        print(rmse_f_Si)


crystalline
rmse_f_Si: 108.29329521739754
rmse_f_H: 207.38650451999638
rmse_f: 115.78601125235559
amorphous
rmse_f_Si: 119.15029302973838
rmse_f_H: 226.4761387900535
rmse_f: 127.00567860041373
liquid
rmse_f_Si: 222.8858702050028
rmse_f_H: 447.30186014050673
rmse_f: 280.49726171888744
high-p
222.8858702050028
molecule
rmse_f_Si: 822.3390714559953
rmse_f_H: 441.6326306948997
rmse_f: 531.916226897414
cluster
rmse_f_Si: 801.5029181121326
rmse_f_H: 605.1511806038761
rmse_f: 674.2341016443656
surface
rmse_f_Si: 248.02539428256108
rmse_f_H: 177.50119189698574
rmse_f: 215.69046226642988


In [24]:
frames=df.ase_atoms.values.tolist()

numbers=[atoms.numbers for atoms in df.ase_atoms]
numbers=[x for xs in numbers for x in xs]
numbers=np.asarray(numbers)

dft_forces=np.concatenate(df.forces.values)

dft_forces_Si=dft_forces[numbers==14]
dft_forces_H=dft_forces[numbers==1]
dft_forces_Si = dft_forces_Si.ravel()
dft_forces_H = dft_forces_H.ravel()

dft_forces = dft_forces.ravel()
ace_forces=df.ace_pred_f.values
ace_forces=[x for xs in ace_forces for x in xs]
ace_forces=np.asarray(ace_forces)

forces_si = ace_forces[numbers == 14]
forces_h = ace_forces[numbers == 1]

ace_forces = np.concatenate(ace_forces).ravel()
forces_si = np.concatenate(forces_si).ravel()
forces_h = np.concatenate(forces_h).ravel()

rmse_f_Si = np.sqrt(np.mean((dft_forces_Si - forces_si)**2)) * 1e3
rmse_f_H = np.sqrt(np.mean((dft_forces_H - forces_h)**2)) * 1e3
rmse_f = np.sqrt(np.mean((dft_forces - ace_forces)**2)) * 1e3

print(rmse_f_Si)
print(rmse_f_H)
print(rmse_f)

150.93374248047158
358.9146222322402
180.32976194611936


In [25]:
### GAP18 validation set

In [28]:
df=pd.read_pickle('../../data/datasets/SCAN_Si-GAP-18_10pct.pkl.gzip',compression="gzip")
df['nb_atoms']=df['ase_atoms'].apply(get_nb_atoms)
df["e_corrected_per_atom"] = df["energy_corrected"] / df["nb_atoms"]

In [29]:
df[f'ACE25_energy'] = df.apply(lambda row: get_pred_en(row, pot), axis=1)/df['nb_atoms']
df[f'ACE25_forces'] = df.apply(lambda row: get_pred_force(row, pot), axis=1)

rmse = np.sqrt(((df['e_corrected_per_atom'] - df['ACE25_energy'])**2).mean())
print(f'Energy RMSE ACE25: {rmse*10**3} meV/atom')

rmse = np.sqrt(((df['forces'] - df['ACE25_forces']).apply(np.square)).apply(np.mean).mean())
print(f'Force RMSE ACE25: {rmse*10**3} meV/Å')

Energy RMSE ACE25: 24.495010531515657 meV/atom
Force RMSE ACE25: 126.5232983204147 meV/Å


In [30]:
### GAP22 validation set

In [31]:
df=pd.read_pickle('../../data/datasets/SCAN-SiH-GAP-22_training.pkl.gzip',compression="gzip")

In [33]:
df['nb_atoms']=df['ase_atoms'].apply(get_nb_atoms)
df["e_corrected_per_atom"] = df["energy_corrected"] / df["nb_atoms"]
df['ace_energy']=df.apply(lambda row: get_pred_en(row, pot), axis=1)/df['nb_atoms']


In [34]:
df['ace_pred_f']=df.apply(lambda row: get_pred_force(row, pot), axis=1)

In [35]:
rmse = np.sqrt(((df['e_corrected_per_atom'] - df['ace_energy'])**2).mean())
rmse*10**3


8.721521312670665

In [36]:
frames=df.ase_atoms.values.tolist()

numbers=[atoms.numbers for atoms in df.ase_atoms]
numbers=[x for xs in numbers for x in xs]
numbers=np.asarray(numbers)

dft_forces=np.concatenate(df.forces.values)

dft_forces_Si=dft_forces[numbers==14]
dft_forces_H=dft_forces[numbers==1]
dft_forces_Si = dft_forces_Si.ravel()
dft_forces_H = dft_forces_H.ravel()

dft_forces = dft_forces.ravel()
ace_forces=df.ace_pred_f.values
ace_forces=[x for xs in ace_forces for x in xs]
ace_forces=np.asarray(ace_forces)

forces_si = ace_forces[numbers == 14]
forces_h = ace_forces[numbers == 1]

ace_forces = np.concatenate(ace_forces).ravel()
forces_si = np.concatenate(forces_si).ravel()
forces_h = np.concatenate(forces_h).ravel()

rmse_f_Si = np.sqrt(np.mean((dft_forces_Si - forces_si)**2)) * 1e3
rmse_f_H = np.sqrt(np.mean((dft_forces_H - forces_h)**2)) * 1e3
rmse_f = np.sqrt(np.mean((dft_forces - ace_forces)**2)) * 1e3

print(rmse_f_Si)
print(rmse_f_H)
print(rmse_f)

149.0479666993789
301.6358023535402
164.460569864475
