In [1]:
import pickle
import numpy as np
import pandas as pd
import random
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

In [3]:
df_val_id = pd.read_csv("../val_sets_with_anomalies/val_id_30k_anomalies.csv", index_col=0)
df_ood_cat = pd.read_csv("../val_sets_with_anomalies/val_ood_cat_30k_anomalies.csv", index_col=0)
df_ood_ads = pd.read_csv("../val_sets_with_anomalies/val_ood_ads_30k_anomalies.csv", index_col=0)
df_ood_both = pd.read_csv("../val_sets_with_anomalies/val_ood_both_30k_anomalies.csv", index_col=0)

# GemNet-OC

### We do this random sampling to make sure that we comparing the metrics of the same number of systems before and after removing the anomalies.

In [12]:
random.seed(10)
df_val_id_sample = df_val_id.iloc[random.sample(range(0,len(df_val_id)), len(df_val_id.query("surface_anomalies!=1")))]
df_ood_cat_sample = df_ood_cat.iloc[random.sample(range(0,len(df_ood_cat)), len(df_ood_cat.query("surface_anomalies!=1")))]
df_ood_ads_sample = df_ood_ads.iloc[random.sample(range(0,len(df_ood_ads)), len(df_ood_ads.query("surface_anomalies!=1")))]
df_ood_both_sample = df_ood_both.iloc[random.sample(range(0,len(df_ood_both)), len(df_ood_both.query("surface_anomalies!=1")))]
# val id
val_id_dict = {"all":np.mean(np.abs(df_val_id_sample["val_id_goc"] - df_val_id_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_val_id.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_id_goc"] \
            - df_val_id.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}
# val ood cat
val_ood_cat_dict = {"all":np.mean(np.abs(df_ood_cat_sample["val_ood_cat_goc"] - df_ood_cat_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_ood_cat.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_ood_cat_goc"] \
            - df_ood_cat.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}
# val ood ads
val_ood_ads_dict = {"all":np.mean(np.abs(df_ood_ads_sample["val_ood_ads_goc"] - df_ood_ads_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_ood_ads.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_ood_ads_goc"] \
            - df_ood_ads.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}
# val ood both 
val_ood_both_dict = {"all":np.mean(np.abs(df_ood_both_sample["val_ood_both_goc"] - df_ood_both_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_ood_both.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_ood_both_goc"] \
            - df_ood_both.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}

In [13]:
# Create DataFrame
data = {
    "val_id": [round(val_id_dict["all"], 3), round(val_id_dict["remove ads anomaly"], 3)],
    "ood_ads": [round(val_ood_ads_dict["all"], 3), round(val_ood_ads_dict["remove ads anomaly"], 3)],
    "ood_cat": [round(val_ood_cat_dict["all"], 3), round(val_ood_cat_dict["remove ads anomaly"], 3)],
    "ood_both": [round(val_ood_both_dict["all"], 3), round(val_ood_both_dict["remove ads anomaly"], 3)]
}

index = ["all", "remove ads anomaly"]

df = pd.DataFrame(data, index=index)
df['mean'] = df.mean(axis=1)
df.round(3)

Unnamed: 0,val_id,ood_ads,ood_cat,ood_both,mean
all,0.164,0.191,0.286,0.353,0.248
remove ads anomaly,0.151,0.179,0.252,0.319,0.225


## eSCN

In [None]:
random.seed(10)
df_val_id_sample = df_val_id.iloc[random.sample(range(0,len(df_val_id)), len(df_val_id.query("surface_anomalies!=1")))]
df_ood_cat_sample = df_ood_cat.iloc[random.sample(range(0,len(df_ood_cat)), len(df_ood_cat.query("surface_anomalies!=1")))]
df_ood_ads_sample = df_ood_ads.iloc[random.sample(range(0,len(df_ood_ads)), len(df_ood_ads.query("surface_anomalies!=1")))]
df_ood_both_sample = df_ood_both.iloc[random.sample(range(0,len(df_ood_both)), len(df_ood_both.query("surface_anomalies!=1")))]
# val id
val_id_dict = {"all":np.mean(np.abs(df_val_id_sample["val_id_escn"] - df_val_id_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_val_id.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_id_escn"] \
            - df_val_id.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}
# val ood cat
val_ood_cat_dict = {"all":np.mean(np.abs(df_ood_cat_sample["val_ood_cat_escn"] - df_ood_cat_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_ood_cat.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_ood_cat_escn"] \
            - df_ood_cat.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}
# val ood ads
val_ood_ads_dict = {"all":np.mean(np.abs(df_ood_ads_sample["val_ood_ads_escn"] - df_ood_ads_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_ood_ads.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_ood_ads_escn"] \
            - df_ood_ads.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}
# val ood both 
val_ood_both_dict = {"all":np.mean(np.abs(df_ood_both_sample["val_ood_both_escn"] - df_ood_both_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_ood_both.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_ood_both_escn"] \
            - df_ood_both.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}

In [9]:
# Create DataFrame
data = {
    "val_id": [round(val_id_dict["all"], 3), round(val_id_dict["remove ads anomaly"], 3)],
    "ood_ads": [round(val_ood_ads_dict["all"], 3), round(val_ood_ads_dict["remove ads anomaly"], 3)],
    "ood_cat": [round(val_ood_cat_dict["all"], 3), round(val_ood_cat_dict["remove ads anomaly"], 3)],
    "ood_both": [round(val_ood_both_dict["all"], 3), round(val_ood_both_dict["remove ads anomaly"], 3)]
}

index = ["all", "remove ads anomaly"]

df = pd.DataFrame(data, index=index)
df['mean'] = df.mean(axis=1)
df.round(3)

Unnamed: 0,val_id,ood_ads,ood_cat,ood_both,mean
all,0.159,0.172,0.257,0.317,0.226
remove ads anomaly,0.146,0.156,0.226,0.281,0.202


## Equiformer-V2

In [14]:
random.seed(10)
df_val_id_sample = df_val_id.iloc[random.sample(range(0,len(df_val_id)), len(df_val_id.query("surface_anomalies!=1")))]
df_ood_cat_sample = df_ood_cat.iloc[random.sample(range(0,len(df_ood_cat)), len(df_ood_cat.query("surface_anomalies!=1")))]
df_ood_ads_sample = df_ood_ads.iloc[random.sample(range(0,len(df_ood_ads)), len(df_ood_ads.query("surface_anomalies!=1")))]
df_ood_both_sample = df_ood_both.iloc[random.sample(range(0,len(df_ood_both)), len(df_ood_both.query("surface_anomalies!=1")))]
# val id
val_id_dict = {"all":np.mean(np.abs(df_val_id_sample["val_id_eqv2"] - df_val_id_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_val_id.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_id_eqv2"] \
            - df_val_id.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}
# val ood cat
val_ood_cat_dict = {"all":np.mean(np.abs(df_ood_cat_sample["val_ood_cat_eqv2"] - df_ood_cat_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_ood_cat.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_ood_cat_eqv2"] \
            - df_ood_cat.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}
# val ood ads
val_ood_ads_dict = {"all":np.mean(np.abs(df_ood_ads_sample["val_ood_ads_eqv2"] - df_ood_ads_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_ood_ads.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_ood_ads_eqv2"] \
            - df_ood_ads.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}
# val ood both 
val_ood_both_dict = {"all":np.mean(np.abs(df_ood_both_sample["val_ood_both_eqv2"] - df_ood_both_sample["ref_energies"])),
              "remove ads anomaly":np.mean(np.abs(df_ood_both.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["val_ood_both_eqv2"] \
            - df_ood_both.query("adsorbate_dissociated_anomalies!=1&adsorbate_desorbed_anomalies!=1")["ref_energies"])),}

In [15]:
# Create DataFrame
data = {
    "val_id": [round(val_id_dict["all"], 3), round(val_id_dict["remove ads anomaly"], 3)],
    "ood_ads": [round(val_ood_ads_dict["all"], 3), round(val_ood_ads_dict["remove ads anomaly"], 3)],
    "ood_cat": [round(val_ood_cat_dict["all"], 3), round(val_ood_cat_dict["remove ads anomaly"], 3)],
    "ood_both": [round(val_ood_both_dict["all"], 3), round(val_ood_both_dict["remove ads anomaly"], 3)]
}

index = ["all", "remove ads anomaly"]

df = pd.DataFrame(data, index=index)
df['mean'] = df.mean(axis=1)
df.round(3)

Unnamed: 0,val_id,ood_ads,ood_cat,ood_both,mean
all,0.159,0.172,0.257,0.317,0.226
remove ads anomaly,0.146,0.156,0.226,0.281,0.202


: 