# 0.1 Importing the DACs on N-doped carbon dataset

## Notebook Setup: Imports and Configuration


In [1]:
# ─────────────────────────────
# Standard Library Imports
# ─────────────────────────────
import os
import sys

# ─────────────────────────────
# Adjust sys.path for Local Modules
# ─────────────────────────────
src_dir = os.path.abspath(os.path.join('..', '..', 'src'))
if src_dir not in sys.path:
    sys.path.append(src_dir)

# ─────────────────────────────
# Project-Specific Imports
# ─────────────────────────────
from settings import *
from utils import *
from vis import *

## Import the DACs on N-doped carbon data

In [2]:
# Define input and output directories
dft_dir = '../../../data/raw'
dacs_energies_out_dir = '../../../data/external/dacs_energies_out'

In [3]:
# Load cohesive energy data for individual metals
metal_df = pd.read_csv(os.path.join(dft_dir, 'metal_data.csv'))

# Load DFT total energy data for various DACs on N-doped carbon structures
total_energy_dacs_df = pd.read_csv(os.path.join(dft_dir, 'total_energy_dacs.csv'))

# Load DFT total energy data for pristine DACs local coordination environment (no adsorbates)
total_energy_sacs_dc_prist_df = pd.read_csv(os.path.join(dft_dir, 'total_energy_sacs_dc_prist.csv'))

# Load DFT total energy data for single metal on DACs local coordination
total_energy_sacs_df = pd.read_csv(os.path.join(dft_dir, 'total_energy_sacs_dc.csv'))


### Cohesive metal dataset

In [4]:
metal_df.shape

(14, 26)

In [5]:
print(", ".join(metal_df.columns))

('metal, Ecoh, atomic_mass, vdw_radius, r_cov_sb, r_cov_db, '
 'dipole_polarizability, ionic_radii_crystals, d_center_sp, Paul_electroneg, '
 'MB_electroneg, electron_affinity, covalent_radius, atomic_number, '
 'Ion_energ_I, Ion_energ_II, Zung_radius, Coh_radius, Waber_radius, '
 'mied_param_h, mied_param_phi, HOMO , LUMO, mag_moment_bulk_d,  E_Fermi, '
 'E_Fermi2')


In [6]:
metal_df.sample()

Unnamed: 0,metal,Ecoh,atomic_mass,vdw_radius,r_cov_sb,r_cov_db,dipole_polarizability,ionic_radii_crystals,d_center_sp,Paul_electroneg,...,Zung_radius,Coh_radius,Waber_radius,mied_param_h,mied_param_phi,HOMO,LUMO,mag_moment_bulk_d,E_Fermi,E_Fermi2
7,Ni,-5.868789,58.6934,197,110,101,49.0,0.49,-2.07977,1.91,...,0.96,1.22,1.139,1.75,5.2,-3.8312,-3.8116,32.738905,7,7.1


In [7]:
metal_df.describe().T.round(2)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Ecoh,14.0,-6.27,3.17,-11.84,-8.43,-6.37,-3.97,-1.23
atomic_mass,14.0,114.83,55.57,55.84,64.0,104.66,170.78,196.97
vdw_radius,14.0,208.29,7.26,196.0,201.75,210.5,213.0,218.0
r_cov_sb,14.0,121.36,7.44,110.0,116.5,122.5,125.0,136.0
r_cov_db,14.0,116.86,11.92,101.0,110.5,115.0,119.25,144.0
dipole_polarizability,14.0,50.81,12.14,26.14,46.12,51.5,56.5,72.0
ionic_radii_crystals,14.0,0.62,0.09,0.39,0.58,0.64,0.68,0.78
d_center_sp,14.0,-3.87,2.08,-9.0,-4.04,-3.27,-2.64,-1.91
Paul_electroneg,14.0,2.03,0.23,1.65,1.88,2.06,2.2,2.4
MB_electroneg,14.0,1.64,0.35,1.07,1.41,1.74,1.9,2.08


### DACs on N-doped carbon total energy dataset

In [8]:
total_energy_dacs_df.shape

(1218, 2)

In [9]:
print(", ".join(total_energy_dacs_df.columns))

'system, tot_energy'


In [10]:
total_energy_dacs_df.sample()

Unnamed: 0,system,tot_energy
556,Pt_Fe_N_din6_as_c1_2,-635.278


In [11]:
total_energy_dacs_df.head() 


Unnamed: 0,system,tot_energy
0,Ag_Fe_C_din4_x2_c6,-629.086
1,Au_Fe_C_din4_x2_c6,-630.74
2,Cd_Fe_C_din4_x2_c6,-627.199
3,Co_Fe_C_din4_x2_c6,-635.247
4,Cu_Fe_C_din4_x2_c6,-631.95


In [12]:
total_energy_dacs_df.describe().T.round(2)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy,1218.0,-630.55,8.54,-640.75,-636.55,-633.98,-629.15,-606.83


### Pristine DACs on N-doped carbon total energy dataset

In [13]:
total_energy_dacs_df.shape

(1218, 2)

In [14]:
print(", ".join(total_energy_sacs_dc_prist_df.columns))

'system, tot_energy'


In [15]:
total_energy_sacs_dc_prist_df.sample()

Unnamed: 0,system,tot_energy
41,N_din6_as_c3_123,-615.893


In [16]:
total_energy_sacs_dc_prist_df.describe().T.round(2)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy,74.0,-612.78,7.47,-618.51,-616.86,-616.05,-614.6,-593.77


### Single atom on DACs local coordination enviroment total energies dataset

In [17]:
total_energy_sacs_df.shape

(1232, 2)

In [18]:
print(", ".join(total_energy_sacs_df.columns))

'system, tot_energy'


In [19]:
total_energy_sacs_df.sample()

Unnamed: 0,system,tot_energy
46,Cu_N_din4_x2_c0,-621.457


In [20]:
total_energy_sacs_df.describe().T.round(2)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy,1232.0,-621.21,7.91,-630.64,-626.64,-623.78,-620.29,-589.7


## Extract Structural Features for DACs on N-doped carbon total energy dataset

This cell parses the `system` string column in the DAC total energy dataset to extract key structural and compositional features. 
New columns are created for metal identifiers (`M1`, `M2`), heteroatoms, cavity types, and engineered cavity labels used for analysis. 
This enables downstream grouping, filtering, or visualization based on atomic configuration.


In [21]:
total_energy_dacs_df['M1'] = total_energy_dacs_df['system'].str[:2]
total_energy_dacs_df['M2'] = total_energy_dacs_df['system'].str[3:5]
total_energy_dacs_df['heteroatom'] = total_energy_dacs_df['system'].str[6]
total_energy_dacs_df['basic_cavity'] = total_energy_dacs_df['system'].str[8:12]
total_energy_dacs_df['cavity'] = total_energy_dacs_df['system'].str[6:]
total_energy_dacs_df['cavity_v2'] = total_energy_dacs_df['system'].str[6:]
#total_energy_dacs_df['cavity_3'] = total_energy_dacs_df['cavity'].str.extract(r'(.*c.)')
total_energy_dacs_df['cavity_3'] = total_energy_dacs_df['cavity'].str.extract(r'c(\d)')
total_energy_dacs_df['M1_cavity'] = total_energy_dacs_df['system'].str[:2] + '_' + total_energy_dacs_df['heteroatom'] +'_' + total_energy_dacs_df['system'].str[8:]
# Create a new column 'cavity_4' based on the values in 'cavity'
total_energy_dacs_df['cavity_4'] = total_energy_dacs_df['cavity'].apply(
    lambda x: 'din4_x2' if 'din4_x2' in x else 
              ('din6_s' if 'din6_s' in x else 
              ('din6_as' if 'din6_as' in x else 
               'other'))  # Default value if none of the specified values are found
)


total_energy_dacs_df['M2_cavity'] = total_energy_dacs_df['system'].str[3:5] + '_' + total_energy_dacs_df['heteroatom'] +'_' + total_energy_dacs_df['system'].str[8:]


### Column Descriptions

| Column         | Description |
|----------------|-------------|
| `system`       | Unique identifier for each DAC structure, encoding metal atoms, heteroatom, and cavity configuration. |
| `tot_energy`   | Total DFT-calculated energy of the DAC system (in eV). |
| `M1`           | Symbol of the first metal atom (first two characters of `system`). |
| `M2`           | Symbol of the second metal atom (characters 4–5 of `system`). |
| `heteroatom`   | The non-metal heteroatom (e.g., N, O, S) present in the cavity (character 6 of `system`). |
| `basic_cavity` | Basic label for the cavity configuration (characters 8–11 of `system`). |
| `cavity`       | Substring of `system` starting from the heteroatom, describing full cavity configuration. |
| `cavity_v2`    | Duplicate of `cavity` (used for flexible processing or alternate mappings). |
| `cavity_3`     | Numeric identifier of the cavity (extracted from pattern `c#` in the `cavity` string). |
| `M1_cavity`    | Concatenation of `M1`, `heteroatom`, and `basic_cavity` to create a composite identifier for M1-cavity configuration. |
| `cavity_4`     | Simplified cavity label categorizing into `din4_x2`, `din6_s`, `din6_as`, or `other`, based on string patterns. |
| `M2_cavity`    | Concatenation of `M2`, `heteroatom`, and `basic_cavity` to create a composite identifier for M2-cavity configuration. |


In [22]:
total_energy_sacs_df.shape

(1232, 2)

In [23]:
print(", ".join(total_energy_dacs_df.columns))

('system, tot_energy, M1, M2, heteroatom, basic_cavity, cavity, cavity_v2, '
 'cavity_3, M1_cavity, cavity_4, M2_cavity')


In [24]:
total_energy_dacs_df.sample()

Unnamed: 0,system,tot_energy,M1,M2,heteroatom,basic_cavity,cavity,cavity_v2,cavity_3,M1_cavity,cavity_4,M2_cavity
375,Rh_Fe_N_din4_x2_c4_c,-636.131,Rh,Fe,N,din4,N_din4_x2_c4_c,N_din4_x2_c4_c,4,Rh_N_din4_x2_c4_c,din4_x2,Fe_N_din4_x2_c4_c


In [25]:
total_energy_dacs_df.describe().T.round(2)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy,1218.0,-630.55,8.54,-640.75,-636.55,-633.98,-629.15,-606.83


## Extract Structural Features for pristine DACs on N-doped carbon total energy dataset

In [26]:
total_energy_sacs_dc_prist_df['heteroatom'] = total_energy_sacs_dc_prist_df['system'].str[0]
total_energy_sacs_dc_prist_df['basic_cavity'] = total_energy_sacs_dc_prist_df['system'].str[2:6]


### Column Descriptions

| Column         | Description |
|----------------|-------------|
| `system`       | Unique identifier for each pristine DAC system, encoding the heteroatom and cavity structure. |
| `tot_energy`   | Total DFT-calculated energy of the pristine DAC system (in eV). |
| `heteroatom`   | The non-metal heteroatom present in the cavity (first character of `system`). |
| `basic_cavity` | Basic cavity configuration label extracted from characters 2 to 5 of `system`. |


In [27]:
total_energy_sacs_dc_prist_df.shape

(74, 4)

In [28]:
print(", ".join(total_energy_sacs_dc_prist_df.columns))

'system, tot_energy, heteroatom, basic_cavity'


In [29]:
total_energy_sacs_dc_prist_df.sample()

Unnamed: 0,system,tot_energy,heteroatom,basic_cavity
45,N_din6_as_c3_164,-617.328,N,din6


In [30]:
total_energy_sacs_dc_prist_df.describe().T.round(2)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy,74.0,-612.78,7.47,-618.51,-616.86,-616.05,-614.6,-593.77


## Extract Structural Features for single atoms on DACs on N-doped carbon total energy dataset

In [31]:
total_energy_sacs_df['metal'] = total_energy_sacs_df['system'].str[:2]
total_energy_sacs_df['heteroatom'] = total_energy_sacs_df['system'].str[3]
total_energy_sacs_df['basic_cavity'] = total_energy_sacs_df['system'].str[5:9]
total_energy_sacs_df['cavity'] = total_energy_sacs_df['system'].str[3:]

### Column Descriptions

| Column         | Description |
|----------------|-------------|
| `system`       | Unique identifier for each SAC system, encoding the metal, heteroatom, and cavity configuration. |
| `tot_energy`   | Total DFT-calculated energy of the SAC system (in eV). |
| `metal`        | The single metal atom in the SAC, extracted from the first two characters of `system`. |
| `heteroatom`   | The non-metal heteroatom coordinated to the metal atom, extracted from the 4th character of `system`. |
| `basic_cavity` | Basic cavity structure, typically 4 characters long, extracted from positions 5 to 8 in `system`. |
| `cavity`       | Full cavity description including heteroatom and structure, extracted from character 4 onward in `system`. |


In [32]:
total_energy_sacs_df.shape

(1232, 6)

In [33]:
print(", ".join(total_energy_sacs_df.columns))

'system, tot_energy, metal, heteroatom, basic_cavity, cavity'


In [34]:
total_energy_sacs_df.sample()

Unnamed: 0,system,tot_energy,metal,heteroatom,basic_cavity,cavity
177,Pd_C_din4_x2_c6,-621.945,Pd,C,din4,C_din4_x2_c6


In [35]:
total_energy_sacs_df.describe().T.round(2)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy,1232.0,-621.21,7.91,-630.64,-626.64,-623.78,-620.29,-589.7


# Calculating the Energies of the DACs on N-doped carbon dataset

The adsorption energy for the DACs is defined:

$$
E_{\text{ads}} = E_{\text{M1,M2,cc}} - E_{\text{cc}} - E_{\text{M1}} - E_{\text{M2}}
$$

where:\
$E_{\text{M1,M2,cc}}$ is the total energy of the adsorbed metal pair
on the N-doped carbon\
$E_{\text{cc}}$ is the energy of the pristine N-doped
carbon\
$E_{\text{M1}}$ is the cohesive energy of the metal M1 \
$E_{\text{M2}}$ is the cohesive energy of the metal M2

## Merging DFT Energies and Calculating Adsorption Energy

This step merges DFT energy datasets of DAC and SAC systems with cohesive energies of individual metals to compute the adsorption energy (E_dft_M1M2) of heteroatoms on DAC surfaces. The final energy is calculated by subtracting the SAC pristine energy and the cohesive energies of both metals from the total DAC energy. The resulting dataset is saved for further analysis.

In [36]:
# Apply the adjust_names function to the 'cavity' column
total_energy_dacs_df['cavity'] = total_energy_dacs_df['cavity'].apply(adjust_names)

# Merge the dataframes: left on 'cavity', right on 'system'
merged_df = pd.merge(total_energy_dacs_df, total_energy_sacs_dc_prist_df, left_on='cavity', right_on='system', suffixes=('_dacs', '_sacs_pristine'))

# Merge the merged_df with metal_df to get Ecoh for M1
merged_m1_df = pd.merge(merged_df, metal_df[['metal', 'Ecoh']], left_on='M1', right_on='metal', how='left')
# Create a new column 'Ecoh_m1' by assigning the values from the 'Ecoh' column and drop 'metal' column
merged_m1_df.rename(columns={'Ecoh': 'Ecoh_m1'}, inplace=True)
merged_m1_df.drop(columns=['metal'], inplace=True)

# Merge the merged_df with metal_df to get Ecoh for M2
merged_m1_m2_df = pd.merge(merged_m1_df, metal_df[['metal', 'Ecoh']], left_on='M2', right_on='metal', how='left')
# Create a new column 'Ecoh_m2' by assigning the values from the 'Ecoh' column and drop 'metal' column
merged_m1_m2_df.rename(columns={'Ecoh': 'Ecoh_m2'}, inplace=True)
merged_m1_m2_df.drop(columns=['metal'], inplace=True)

# Calculate the E_dft_M1M2 value by subtracting the energy columns and cohesive energies
merged_m1_m2_df['E_dft_M1M2'] = (
    merged_m1_m2_df['tot_energy_dacs'] -
    merged_m1_m2_df['tot_energy_sacs_pristine'] -
    merged_m1_m2_df['Ecoh_m1'] -
    merged_m1_m2_df['Ecoh_m2']
)

# Copy the merged_df to Edft_coh_df for saving
Edft_coh_df = merged_m1_m2_df.copy()

# Define the path where you want to save the CSV file
save_path = os.path.join(dacs_energies_out_dir, 'Edft_coh_df.csv')

# # Save the dataframe to a CSV file
Edft_coh_df.to_csv(save_path, index=False, header=True)


# Define the path where you want to save the pickle file
save_path_pkl = os.path.join(dacs_energies_out_dir, 'Edft_coh_df.pkl')

# Save the dataframe to a pickle file
Edft_coh_df.to_pickle(save_path_pkl)

| **Feature**               | **Description**                                                                                  |
|---------------------------|------------------------------------------------------------------------------------------------|
| `system_dacs`             | Identifier for the DAC system (dual-atom catalyst)                                             |
| `tot_energy_dacs`         | Total DFT energy of the DAC system                                                             |
| `M1`                      | Metal 1 element symbol in the DAC system                                                       |
| `M2`                      | Metal 2 element symbol in the DAC system                                                       |
| `heteroatom_dacs`         | Heteroatom element symbol in the DAC system                                                    |
| `basic_cavity_dacs`       | Basic cavity structure identifier in the DAC system                                           |
| `cavity`                  | Adjusted cavity identifier used for merging                                                    |
| `cavity_v2`               | Alternative cavity identifier (same as `cavity`)                                               |
| `cavity_3`                | Extracted cavity subtype (e.g., numeric pattern)                                               |
| `M1_cavity`               | Combination of M1 metal, heteroatom, and cavity details                                        |
| `cavity_4`                | Categorized cavity type (e.g., `din4_x2`, `din6_s`, `din6_as`, `other`)                        |
| `M2_cavity`               | Combination of M2 metal, heteroatom, and cavity details                                        |
| `system_sacs_pristine`    | Identifier for the pristine SAC system (single-atom catalyst)                                 |
| `tot_energy_sacs_pristine`| Total DFT energy of the pristine SAC system                                                   |
| `heteroatom_sacs_pristine`| Heteroatom element symbol in the pristine SAC system                                          |
| `basic_cavity_sacs_pristine` | Basic cavity structure identifier in the pristine SAC system                             |
| `Ecoh_m1`                 | Cohesive energy of metal M1                                                                   |
| `Ecoh_m2`                 | Cohesive energy of metal M2                                                                   |
| `E_dft_M1M2`              | Calculated adsorption energy: total DAC energy minus pristine SAC energy and cohesive energies |


In [37]:
Edft_coh_df.shape

(1218, 19)

In [38]:
print(", ".join(Edft_coh_df.columns))

('system_dacs, tot_energy_dacs, M1, M2, heteroatom_dacs, basic_cavity_dacs, '
 'cavity, cavity_v2, cavity_3, M1_cavity, cavity_4, M2_cavity, '
 'system_sacs_pristine, tot_energy_sacs_pristine, heteroatom_sacs_pristine, '
 'basic_cavity_sacs_pristine, Ecoh_m1, Ecoh_m2, E_dft_M1M2')


In [39]:
Edft_coh_df.sample()

Unnamed: 0,system_dacs,tot_energy_dacs,M1,M2,heteroatom_dacs,basic_cavity_dacs,cavity,cavity_v2,cavity_3,M1_cavity,cavity_4,M2_cavity,system_sacs_pristine,tot_energy_sacs_pristine,heteroatom_sacs_pristine,basic_cavity_sacs_pristine,Ecoh_m1,Ecoh_m2,E_dft_M1M2
432,Ru_Fe_N_din4_x2_c4_e,-637.866,Ru,Fe,N,din4,N_din4_x2_c4_e,N_din4_x2_c4_e,4,Ru_N_din4_x2_c4_e,din4_x2,Fe_N_din4_x2_c4_e,N_din4_x2_c4_e,-615.582,N,din4,-9.812466,-8.619718,-3.851816


In [40]:
Edft_coh_df.describe().T.round(2)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy_dacs,1218.0,-630.55,8.54,-640.75,-636.55,-633.98,-629.15,-606.83
tot_energy_sacs_pristine,1218.0,-613.32,6.97,-618.51,-616.87,-616.12,-614.85,-593.77
Ecoh_m1,1218.0,-6.27,3.06,-11.84,-8.62,-6.37,-3.88,-1.23
Ecoh_m2,1218.0,-8.62,0.0,-8.62,-8.62,-8.62,-8.62,-8.62
E_dft_M1M2,1218.0,-2.34,2.88,-7.74,-3.69,-2.62,-1.56,21.56


In [41]:
# # Select specific configuration
# print(Edft_coh_df[Edft_coh_df['system_dacs'] == 'Ni_Fe_N_din6_as_c0']['E_dft_M1M2'])

#### Remove not stable cavity M_N_din4_x2_c4_f

In [42]:
Edft_coh_df = Edft_coh_df[~(Edft_coh_df['cavity_v2'] == 'N_din4_x2_c4_f') & ~Edft_coh_df['cavity_v2'].str.endswith('_v2')]#&~(Edft_coh_df['cavity_v2'] == 'N_din6_s_c4_0134')]

In [43]:
Edft_coh_df.shape


(1022, 19)

In [44]:
total_energy_dacs_df.shape

(1218, 12)

## Boxplot of the E_dft_M1M2 categorized by metal

In [45]:
fig = plot_categorical_energy_boxplot(
    df=Edft_coh_df,
    x_col='M1',
    color_map=metal_colors,
    y_col='E_dft_M1M2',
    hover_col='system_dacs',
    save_dir='../../../data/figures/dacs_eda',
    x_title='Metal',
    y_title='E_dft_M1M2 Energy',
    file_name='Ecoh_M1_boxplot_1'
)

fig.show()


## Boxplot of the E_dft_M1M2 categorized by metal - no outliers

In [46]:

# Assuming Edft_coh_df and metal_colors are already defined

# Map colors to the metal types in the dataframe
Edft_coh_df['color'] = Edft_coh_df['M1'].map(metal_colors)


# Detect outliers
outliers_dacs_metal = detect_outliers(Edft_coh_df, 'M1', 'E_dft_M1M2')


# Save the outliers to a CSV file
outliers_path = os.path.join(dacs_energies_out_dir, 'outliers_dacs_metal.csv')
outliers_dacs_metal.to_csv(outliers_path, index=False, header=True)

# Remove outliers from the dataset
Edft_coh_df_no_outliers_metal = Edft_coh_df[~Edft_coh_df.index.isin(outliers_dacs_metal.index)]

fig_no_outliers = plot_categorical_energy_boxplot(
    df=Edft_coh_df_no_outliers_metal,
    x_col='M1',
    color_map=metal_colors,
    y_col='E_dft_M1M2',
    hover_col='system_dacs',
    save_dir='../../../data/figures/dacs_eda',
    x_title='Metal',
    y_title='E_dft_M1M2 Energy',
    file_name='Ecoh_M1_boxplot_no_outliers'
)

fig_no_outliers.show()





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [47]:
metal_summary = Edft_coh_df_no_outliers_metal.groupby('M1')['E_dft_M1M2'].describe().T
print(metal_summary)

M1            Ag         Au         Cd         Co         Cu         Fe  \
count  72.000000  72.000000  72.000000  71.000000  72.000000  73.000000   
mean   -1.588471  -2.033517  -1.957432  -3.132801  -2.728444  -3.056921   
std     1.203699   1.452171   0.982471   1.733206   1.406342   1.868994   
min    -4.606277  -5.091573  -4.379224  -7.418843  -6.101694  -7.516565   
25%    -2.341277  -2.960323  -2.581724  -4.235843  -3.565444  -4.267565   
50%    -1.711277  -2.246073  -1.976724  -3.039843  -2.843194  -2.934565   
75%    -0.727027  -1.070823  -1.269724  -2.021343  -1.812694  -1.792565   
max     0.983723   1.557427   0.130776   0.612157   0.785306   1.875435   

M1            Ir         Ni         Os         Pd         Pt         Rh  \
count  71.000000  72.000000  72.000000  70.000000  69.000000  70.000000   
mean   -2.500340  -3.497772  -1.581266  -2.867765  -3.129836  -2.832718   
std     1.675001   1.689752   1.809329   1.312810   1.494425   1.411221   
min    -6.720199  -7.615

## Boxplot of the E_dft_M1M2 categorized by cavity 

In [48]:
fig_cavity = plot_categorical_energy_boxplot(
    df=Edft_coh_df,
    x_col='cavity_v2',
    color_map=cavity_colors,
    y_col='E_dft_M1M2',
    hover_col='system_dacs',
    x_title='Cavity',
    y_title='E_dft_M1M2 Energy',
    save_dir='../../../data/figures/dacs_eda',
    file_name='Ecoh_M1_boxplot_cavity'
)

fig_cavity.show()


## Boxplot of the E_dft_M1M2 categorized by cavity - no outliers

In [49]:
# Map colors to the metal types in the dataframe
Edft_coh_df['color'] = Edft_coh_df['cavity_v2'].map(cavity_colors)

# Detect outliers
outliers_cavity_dacs = detect_outliers(Edft_coh_df, 'cavity_v2', 'E_dft_M1M2')

# Save the outliers to a CSV file
outliers_path = os.path.join(dacs_energies_out_dir, 'outliers_dacs_cavity.csv')
outliers_cavity_dacs.to_csv(outliers_path, index=False, header=True)

# Remove outliers from the dataset
Edft_coh_df_no_outliers_cav = Edft_coh_df[~Edft_coh_df.index.isin(outliers_cavity_dacs.index)]

fig_no_outliers_cavity = plot_categorical_energy_boxplot(
    df=Edft_coh_df_no_outliers_cav,
    x_col='cavity_v2',
    color_map=cavity_colors,
    y_col='E_dft_M1M2',
    hover_col='system_dacs',
    x_title='Cavity',
    y_title='E_dft_M1M2 Energy',
    save_dir='../../../data/figures/dacs_eda',
    file_name='Ecoh_M1_boxplot_cavity_no_outliers'
)

fig_no_outliers_cavity.show()





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [50]:
cavity_summary = Edft_coh_df_no_outliers_cav.groupby('cavity_v2')['E_dft_M1M2'].describe().T
print(cavity_summary)

cavity_v2  C_din4_x2_c6  C_din6_as_c6  C_din6_s_c6  N_din4_x2_c0  \
count         14.000000     14.000000    14.000000     14.000000   
mean          -4.983974     -2.829402    -3.649545     -5.663545   
std            1.051756      0.597252     0.505006      1.206095   
min           -6.383494     -3.775494    -4.614764     -7.516565   
25%           -5.903412     -3.102024    -3.973739     -6.413299   
50%           -5.024176     -2.916017    -3.768619     -5.731834   
75%           -4.103901     -2.479912    -3.230841     -5.157530   
max           -3.018904     -1.616224    -2.766904     -3.348277   

cavity_v2  N_din4_x2_c1_a  N_din4_x2_c1_b  N_din4_x2_c2_a  N_din4_x2_c2_b  \
count           14.000000       14.000000       12.000000       14.000000   
mean            -2.785474       -5.814688       -2.425565       -2.965974   
std              1.771586        1.236407        0.843153        1.512873   
min             -4.942565       -7.473565       -3.720565       -4.851843   
25

## Data Cleaning: Removing Metal and Cavity Outliers

In [51]:
# Combine indices from outliers_dacs_metal and outliers_dacs_cavity
combined_outliers_indices = outliers_dacs_metal.index.union(outliers_cavity_dacs.index)

Edft_coh_df_iqr = Edft_coh_df[~Edft_coh_df.index.isin(combined_outliers_indices)]

In [52]:
Edft_coh_df_iqr.shape

(983, 20)

In [53]:
print(", ".join(Edft_coh_df_iqr.columns))

('system_dacs, tot_energy_dacs, M1, M2, heteroatom_dacs, basic_cavity_dacs, '
 'cavity, cavity_v2, cavity_3, M1_cavity, cavity_4, M2_cavity, '
 'system_sacs_pristine, tot_energy_sacs_pristine, heteroatom_sacs_pristine, '
 'basic_cavity_sacs_pristine, Ecoh_m1, Ecoh_m2, E_dft_M1M2, color')


In [54]:
Edft_coh_df_iqr.sample()

Unnamed: 0,system_dacs,tot_energy_dacs,M1,M2,heteroatom_dacs,basic_cavity_dacs,cavity,cavity_v2,cavity_3,M1_cavity,cavity_4,M2_cavity,system_sacs_pristine,tot_energy_sacs_pristine,heteroatom_sacs_pristine,basic_cavity_sacs_pristine,Ecoh_m1,Ecoh_m2,E_dft_M1M2,color
1110,Cu_Fe_N_din6_s_c2_15,-611.899,Cu,Fe,N,din6,N_din6_s_c2_15,N_din6_s_c2_15,2,Cu_N_din6_s_c2_15,din6_s,Fe_N_din6_s_c2_15,N_din6_s_c2_15,-598.852,N,din6,-4.234588,-8.619718,-0.192694,blue


In [55]:
Edft_coh_df_iqr.describe().T.round(2)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy_dacs,983.0,-630.48,8.48,-640.75,-636.37,-633.88,-629.16,-606.83
tot_energy_sacs_pristine,983.0,-613.04,7.27,-618.51,-616.87,-616.11,-614.67,-593.77
Ecoh_m1,983.0,-6.24,3.03,-11.84,-8.62,-5.87,-3.88,-1.23
Ecoh_m2,983.0,-8.62,0.0,-8.62,-8.62,-8.62,-8.62,-8.62
E_dft_M1M2,983.0,-2.59,1.59,-7.62,-3.64,-2.56,-1.58,2.56


## Boxplot of the E_dft_M1M2 categorized by metal - no total outliers

In [56]:
fig = plot_categorical_energy_boxplot(
    df=Edft_coh_df_iqr,
    x_col='M1',
    color_map=metal_colors,
    y_col='E_dft_M1M2',
    hover_col='system_dacs',
    x_title='Metal',
    y_title='E_dft_M1M2 Energy',
    save_dir='../../../data/figures/dacs_eda',
    file_name='Ecoh_M1_boxplot_metal_iqr'
)

fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [57]:
metal_summary = Edft_coh_df_iqr.groupby('M1')['E_dft_M1M2'].describe().T
print(metal_summary)


M1            Ag         Au         Cd         Co         Cu         Fe  \
count  70.000000  71.000000  71.000000  71.000000  72.000000  73.000000   
mean   -1.643105  -2.064474  -1.948787  -3.132801  -2.728444  -3.056921   
std     1.175362   1.438382   0.986702   1.733206   1.406342   1.868994   
min    -4.606277  -5.091573  -4.379224  -7.418843  -6.101694  -7.516565   
25%    -2.341277  -2.991073  -2.572724  -4.235843  -3.565444  -4.267565   
50%    -1.733777  -2.261573  -1.960224  -3.039843  -2.843194  -2.934565   
75%    -0.786277  -1.119573  -1.264224  -2.021343  -1.812694  -1.792565   
max     0.983723   1.557427   0.130776   0.612157   0.785306   1.875435   

M1            Ir         Ni         Os         Pd         Pt         Rh  \
count  71.000000  72.000000  64.000000  70.000000  69.000000  70.000000   
mean   -2.500340  -3.497772  -1.750467  -2.867765  -3.129836  -2.832718   
std     1.675001   1.689752   1.812068   1.312810   1.494425   1.411221   
min    -6.720199  -7.615

## Boxplot of the E_dft_M1M2 categorized by cavity - no total outliers

In [58]:
fig = plot_categorical_energy_boxplot(
    df=Edft_coh_df_iqr,
    x_col='cavity_v2',
    color_map=cavity_colors,
    y_col='E_dft_M1M2',
    hover_col='system_dacs',
    x_title='Cavity',
    y_title='E_dft_M1M2 Energy',
    save_dir='../../../data/figures/dacs_eda',
    file_name='Ecoh_M1_boxplot_cavity_iqr'
)

fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [59]:
cavity_summary = Edft_coh_df_iqr.groupby('cavity_v2')['E_dft_M1M2'].describe().T
print(cavity_summary)

cavity_v2  C_din4_x2_c6  C_din6_as_c6  C_din6_s_c6  N_din4_x2_c0  \
count         14.000000     14.000000    14.000000     13.000000   
mean          -4.983974     -2.829402    -3.649545     -5.617447   
std            1.051756      0.597252     0.505006      1.242440   
min           -6.383494     -3.775494    -4.614764     -7.516565   
25%           -5.903412     -3.102024    -3.973739     -6.463460   
50%           -5.024176     -2.916017    -3.768619     -5.643764   
75%           -4.103901     -2.479912    -3.230841     -5.030694   
max           -3.018904     -1.616224    -2.766904     -3.348277   

cavity_v2  N_din4_x2_c1_a  N_din4_x2_c1_b  N_din4_x2_c2_a  N_din4_x2_c2_b  \
count           14.000000       13.000000       12.000000       14.000000   
mean            -2.785474       -5.780140       -2.425565       -2.965974   
std              1.771586        1.279841        0.843153        1.512873   
min             -4.942565       -7.473565       -3.720565       -4.851843   
25

## Dataset with the DACs on N-doped carbon without outliers

Filtered Dataset of DACs on N-Doped Carbon After Outlier Removal Using IQR Method

In [60]:
# Define the path where you want to save the CSV file
save_path_csv = os.path.join(dacs_energies_out_dir, 'Edft_coh_df_iqr.csv')

# Save the dataframe to a CSV file
Edft_coh_df_iqr.to_csv(save_path_csv, index=False, header=True)

# Define the path where you want to save the pickle file
save_path_pkl = os.path.join(dacs_energies_out_dir, 'Edft_coh_df_iqr.pkl')

# Save the dataframe to a pickle file
Edft_coh_df_iqr.to_pickle(save_path_pkl)


| Feature                  | Description                                                                                       |
|--------------------------|-------------------------------------------------------------------------------------------------|
| 'system_dacs'          | Identifier for the dual-atom catalyst (DAC) system                                              |
| 'tot_energy_dacs'      | Total DFT energy of the DAC system                                                              |
| 'M1'                   | Symbol of the first metal atom in the DAC                                                       |
| 'M2'                   | Symbol of the second metal atom in the DAC                                                      |
| 'heteroatom_dacs'      | Heteroatom type present in the DAC system                                                       |
| 'basic_cavity_dacs'    | Basic cavity environment descriptor of the DAC                                                  |
| 'cavity'               | Detailed cavity environment string related to the DAC                                           |
| 'cavity_v2'            | Modified/standardized cavity environment name                                                   |
| 'cavity_3'             | Numeric identifier extracted from cavity description                                            |
| 'M1_cavity'            | Concatenation of M1 metal, heteroatom, and cavity environment                                   |
| 'cavity_4'             | Categorized cavity type based on specific substrings (e.g., 'din4_x2', 'din6_s', 'din6_as', 'other') |
| 'M2_cavity'            | Concatenation of M2 metal, heteroatom, and cavity environment                                   |
| 'system_sacs_pristine' | Identifier for the single-atom catalyst (SAC) pristine system                                   |
| 'tot_energy_sacs_pristine' | Total DFT energy of the pristine SAC system                                               |
| 'heteroatom_sacs_pristine' | Heteroatom type in the pristine SAC system                                               |
| 'basic_cavity_sacs_pristine' | Basic cavity environment descriptor of the pristine SAC                               |
| 'Ecoh_m1'              | Cohesive energy of the first metal (M1)                                                        |
| 'Ecoh_m2'              | Cohesive energy of the second metal (M2)                                                       |
| 'E_dft_M1M2'           | Calculated interaction energy combining DFT energies and cohesive energies                      |
| 'color'                | Color code mapped to the metal type or cavity for visualization                                 |


In [61]:
Edft_coh_df_iqr.shape

(983, 20)

In [62]:
print(", ".join(Edft_coh_df_iqr.columns))

('system_dacs, tot_energy_dacs, M1, M2, heteroatom_dacs, basic_cavity_dacs, '
 'cavity, cavity_v2, cavity_3, M1_cavity, cavity_4, M2_cavity, '
 'system_sacs_pristine, tot_energy_sacs_pristine, heteroatom_sacs_pristine, '
 'basic_cavity_sacs_pristine, Ecoh_m1, Ecoh_m2, E_dft_M1M2, color')


In [63]:
Edft_coh_df_iqr.sample()

Unnamed: 0,system_dacs,tot_energy_dacs,M1,M2,heteroatom_dacs,basic_cavity_dacs,cavity,cavity_v2,cavity_3,M1_cavity,cavity_4,M2_cavity,system_sacs_pristine,tot_energy_sacs_pristine,heteroatom_sacs_pristine,basic_cavity_sacs_pristine,Ecoh_m1,Ecoh_m2,E_dft_M1M2,color
865,Rh_Fe_N_din6_as_c3_236,-635.699,Rh,Fe,N,din6,N_din6_as_c3_236,N_din6_as_c3_236,3,Rh_N_din6_as_c3_236,din6_as,Fe_N_din6_as_c3_236,N_din6_as_c3_236,-615.994,N,din6,-7.850822,-8.619718,-3.23446,green


In [64]:
Edft_coh_df_iqr.describe().T.round(2)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy_dacs,983.0,-630.48,8.48,-640.75,-636.37,-633.88,-629.16,-606.83
tot_energy_sacs_pristine,983.0,-613.04,7.27,-618.51,-616.87,-616.11,-614.67,-593.77
Ecoh_m1,983.0,-6.24,3.03,-11.84,-8.62,-5.87,-3.88,-1.23
Ecoh_m2,983.0,-8.62,0.0,-8.62,-8.62,-8.62,-8.62,-8.62
E_dft_M1M2,983.0,-2.59,1.59,-7.62,-3.64,-2.56,-1.58,2.56


## Interaction Energy

The interaction energy between the two metals M1 and M2 is defined:
$$
E_{\text{int}} = E_{\text{M1,M2,cc}} + E_{\text{cc}} - E_{\text{M1/cc}} - E_{\text{M2/cc}}
$$

where:\
$E_{\text{M1,M2,cc}}$ is the total energy of the dual-atom
catalyst with both metals co-adsorbed\
$E_{\text{cc}}$ is the total energy of
the pristine support\
$E_{\text{M1/cc}}$ is total energy of the M1 metal center adsorbed individually into the support\
$E_{\text{M2/cc}}$ is total energy of the M2 metal center adsorbed individually into the support

# Merging Datasets of Single-Atom Adsorption on DAC Local Coordination Environments 

This script merges datasets related to dual-atom catalysts (DACs), pristine supports, and single-atom adsorbed systems. It aligns the local coordination environments of adsorbed metal atoms (M1 and M2) on DACs with their corresponding reference energies, enabling the calculation of interaction energies (Eint) for the adsorbed configurations. The result is a unified dataset suitable for analyzing adsorption behavior and energetics across different catalyst structures.

In [65]:

total_energy_dacs_df['cavity'] = total_energy_dacs_df['cavity'].apply(adjust_names)


# Merge the dataframes: left on 'cavity', right on 'system'
merged_df = pd.merge(total_energy_dacs_df, total_energy_sacs_dc_prist_df, left_on='cavity', right_on='system', suffixes=('_dacs', '_sacs_pristine'))

print(merged_df.shape)

merged_n1_df = pd.merge(merged_df, total_energy_sacs_df[['system', 'tot_energy']].rename(columns={'tot_energy': 'tot_energy_M1'}), left_on='M1_cavity', right_on='system')

# Dropping duplicates
merged_n1_df = merged_n1_df.drop_duplicates()


print(merged_n1_df.shape)

merged_n1_n2_df = pd.merge(merged_n1_df, total_energy_sacs_df[['system', 'tot_energy']].rename(columns={'tot_energy': 'tot_energy_M2'}), left_on='M2_cavity', right_on='system')

# Subtract the 'tot_energy' columns
merged_n1_n2_df['Eint'] = merged_n1_n2_df['tot_energy_dacs'] + merged_n1_n2_df['tot_energy_sacs_pristine'] - merged_n1_n2_df['tot_energy_M1'] - merged_n1_n2_df['tot_energy_M2']

# Dropping duplicates
merged_n1_n2_df = merged_n1_n2_df.drop_duplicates()

print(merged_n1_n2_df.shape)

Eint_df =merged_n1_n2_df

# Define the path where you want to save the CSV file
save_path = os.path.join(dacs_energies_out_dir, 'Eint_df.csv')

# Save the dataframe to a CSV file
Eint_df.to_csv(save_path, index=False, header=True)

(1218, 16)
(1218, 18)
(1218, 21)


In [66]:
Eint_df.shape

(1218, 21)

In [67]:
print(", ".join(Eint_df.columns))

('system_dacs, tot_energy_dacs, M1, M2, heteroatom_dacs, basic_cavity_dacs, '
 'cavity, cavity_v2, cavity_3, M1_cavity, cavity_4, M2_cavity, '
 'system_sacs_pristine, tot_energy_sacs_pristine, heteroatom_sacs_pristine, '
 'basic_cavity_sacs_pristine, system_x, tot_energy_M1, system_y, '
 'tot_energy_M2, Eint')


In [68]:
Eint_df.sample()

Unnamed: 0,system_dacs,tot_energy_dacs,M1,M2,heteroatom_dacs,basic_cavity_dacs,cavity,cavity_v2,cavity_3,M1_cavity,...,M2_cavity,system_sacs_pristine,tot_energy_sacs_pristine,heteroatom_sacs_pristine,basic_cavity_sacs_pristine,system_x,tot_energy_M1,system_y,tot_energy_M2,Eint
280,Ag_Fe_N_din4_x2_c3_c,-629.898,Ag,Fe,N,din4,N_din4_x2_c3_c,N_din4_x2_c3_c,3,Ag_N_din4_x2_c3_c,...,Fe_N_din4_x2_c3_c,N_din4_x2_c3_c,-615.869,N,din4,Ag_N_din4_x2_c3_c,-620.759,Fe_N_din4_x2_c3_c,-626.644,1.636


In [69]:
Eint_df.describe().T.round(2)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy_dacs,1218.0,-630.55,8.54,-640.75,-636.55,-633.98,-629.15,-606.83
tot_energy_sacs_pristine,1218.0,-613.32,6.97,-618.51,-616.87,-616.12,-614.85,-593.77
tot_energy_M1,1218.0,-621.18,7.94,-630.64,-626.64,-623.78,-620.29,-589.7
tot_energy_M2,1218.0,-623.88,7.67,-629.03,-627.71,-627.12,-626.71,-604.1
Eint,1218.0,1.19,2.5,-37.3,0.46,1.24,2.3,9.82


## Boxplot of the Eint categorized by metal type

In [70]:
plot_categorical_energy_boxplot(
    df=Eint_df,
    x_col='M1',
    y_col='Eint',
    color_map=metal_colors,
    hover_col='system_dacs',
    save_dir='../../../data/figures/dacs_eda',
    x_title='Metal Type',
    y_title='Interaction Energy (Eint)',
    file_name='Eint_M1_metal_boxplot'
)


## Boxplot of the Eint categorized by metal type - no outliers

In [71]:
# Map colors to the metal types in the dataframe
Eint_df['color'] = Eint_df['M1'].map(metal_colors)

# Detect outliers
outliers_dacs_metal_Eint = detect_outliers(Eint_df, 'M1', 'Eint')

# Save the outliers to a CSV file
outliers_path = os.path.join(dacs_energies_out_dir, 'outliers_dacs_metal_Eint.csv')
outliers_dacs_metal_Eint.to_csv(outliers_path, index=False, header=True)

# Remove outliers from the dataset
Eint_df_no_outliers_metal = Eint_df[~Eint_df.index.isin(outliers_dacs_metal_Eint.index)]

plot_categorical_energy_boxplot(
    df=Eint_df_no_outliers_metal,
    x_col='M1',
    y_col='Eint',
    color_map=metal_colors,
    hover_col='system_dacs',
    save_dir='../../../data/figures/dacs_eda',
    x_title='Metal Type',
    y_title='Interaction Energy (Eint)',
    file_name='Eint_M1_metal_no_outliers_boxplot'
)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



## Boxplot of the Eint categorized by cavity type

In [72]:
plot_categorical_energy_boxplot(
    df=Eint_df,
    x_col='cavity_v2',
    y_col='Eint',
    color_map=cavity_colors,
    hover_col='system_dacs',
    save_dir='../../../data/figures/dacs_eda',
    x_title='Cavity',
    y_title='E_dft_M1M2 Energy',
    file_name='Eint_M1_cavity_boxplot'
)


## Boxplot of the Eint categorized by cavity type

In [73]:
# Map colors to the metal types in the dataframe
Eint_df['color'] = Eint_df['cavity_v2'].map(cavity_colors)

# Detect outliers
outliers_cavity_dacs_Eint = detect_outliers(Eint_df, 'cavity_v2', 'Eint')

# Save the outliers to a CSV file
outliers_path = os.path.join(dacs_energies_out_dir, 'outliers_dacs_cavity_Eint.csv')
outliers_cavity_dacs_Eint.to_csv(outliers_path, index=False, header=True)

# Remove outliers from the dataset
Eint_df_no_outliers_cav = Eint_df[~Eint_df.index.isin(outliers_cavity_dacs_Eint.index)]


plot_categorical_energy_boxplot(
    df=Eint_df,
    x_col='cavity_v2',
    y_col='Eint',
    color_map=cavity_colors,
    hover_col='system_dacs',
    save_dir='../../../data/figures/dacs_eda',
    x_title='Cavity',
    y_title='E_dft_M1M2 Energy',
    file_name='Eint_M1_cavity_no_outliers_boxplot'
)


## Data Cleaning: Removing Metal and Cavity outliers

In [74]:
# Combine indices from outliers_dacs_metal and outliers_dacs_cavity
combined_outliers_indices = outliers_dacs_metal_Eint.index.union(outliers_cavity_dacs_Eint.index)

Eint_df_iqr = Eint_df[~Eint_df.index.isin(combined_outliers_indices)]

In [75]:
# Define the path where you want to save the CSV file
save_path_csv = os.path.join(dacs_energies_out_dir, 'Eint_df_iqr.csv')

# Save the dataframe to a CSV file
Eint_df_iqr.to_csv(save_path_csv, index=False, header=True)

# Define the path where you want to save the pickle file
save_path_pkl = os.path.join(dacs_energies_out_dir, 'Eint_df_iqr.pkl')

# Save the dataframe to a pickle file
Eint_df_iqr.to_pickle(save_path_pkl)


### 🔍 Feature Description Table

| **Feature Name**         | **Description**                                                                 |
|--------------------------|----------------------------------------------------------------------------------|
| `system_dacs`            | Identifier for the dual-atom catalyst (DAC) system (e.g., M1-M2 bound to cavity). |
| `tot_energy_dacs`        | Total DFT energy of the DAC system with both M1 and M2 adsorbed.                |
| `M1`                     | First metal atom type in the DAC (e.g., Fe, Co, Ni).                             |
| `M2`                     | Second metal atom type in the DAC.                                               |
| `heteroatom_dacs`        | Type of heteroatom(s) present in the DAC support (e.g., N, B, O).               |
| `basic_cavity_dacs`      | Simplified representation of the DAC binding site or cavity.                    |
| `cavity`                 | Original cavity identifier from the DAC structure.                              |
| `cavity_v2`              | Cleaned or renamed version of `cavity` (standardized for analysis/plotting).    |
| `cavity_3`               | Further refined cavity label (possibly grouped or reclassified).                |
| `M1_cavity`              | Identifier of the single-metal site where only M1 is adsorbed.                  |
| `cavity_4`               | Another variant of the cavity name (used for additional grouping or filtering). |
| `M2_cavity`              | Identifier of the single-metal site where only M2 is adsorbed.                  |
| `system_sacs_pristine`   | Identifier for the pristine (metal-free) single-atom catalyst (SAC) system.     |
| `tot_energy_sacs_pristine` | Total DFT energy of the pristine SACS system (without any metal).             |
| `heteroatom_sacs_pristine` | Type of heteroatom(s) in the pristine SACS.                                  |
| `basic_cavity_sacs_pristine` | Simplified representation of the pristine cavity site.                     |
| `system_x`               | Reference to M1-cavity system used in energy comparison.                        |
| `tot_energy_M1`          | Total DFT energy of the system with only M1 adsorbed.                           |
| `system_y`               | Reference to M2-cavity system used in energy comparison.                        |
| `tot_energy_M2`          | Total DFT energy of the system with only M2 adsorbed.                           |
| `Eint`                   | Interaction energy of the full DAC system with both metals, relative to parts.  |
| `color`                  | Color code (used for plotting) mapped from either `M1` or `cavity_v2`.          |


In [76]:
Eint_df_iqr.shape

(1118, 22)

In [77]:
print(", ".join(Eint_df_iqr.columns))

('system_dacs, tot_energy_dacs, M1, M2, heteroatom_dacs, basic_cavity_dacs, '
 'cavity, cavity_v2, cavity_3, M1_cavity, cavity_4, M2_cavity, '
 'system_sacs_pristine, tot_energy_sacs_pristine, heteroatom_sacs_pristine, '
 'basic_cavity_sacs_pristine, system_x, tot_energy_M1, system_y, '
 'tot_energy_M2, Eint, color')


In [78]:
Eint_df_iqr.sample()

Unnamed: 0,system_dacs,tot_energy_dacs,M1,M2,heteroatom_dacs,basic_cavity_dacs,cavity,cavity_v2,cavity_3,M1_cavity,...,system_sacs_pristine,tot_energy_sacs_pristine,heteroatom_sacs_pristine,basic_cavity_sacs_pristine,system_x,tot_energy_M1,system_y,tot_energy_M2,Eint,color
602,Ag_Fe_N_din6_as_c2_13,-629.794,Ag,Fe,N,din6,N_din6_as_c2_13,N_din6_as_c2_13,2,Ag_N_din6_as_c2_13,...,N_din6_as_c2_13,-617.063,N,din6,Ag_N_din6_as_c2_13,-620.198,Fe_N_din6_as_c2_13,-627.178,0.519,green


In [79]:
Eint_df_iqr.describe().T.round(2)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy_dacs,1118.0,-630.66,8.49,-640.75,-636.58,-634.04,-629.26,-606.83
tot_energy_sacs_pristine,1118.0,-613.19,7.1,-618.51,-616.83,-616.12,-614.72,-593.77
tot_energy_M1,1118.0,-621.33,7.92,-630.64,-626.72,-623.96,-620.49,-597.69
tot_energy_M2,1118.0,-623.92,7.64,-629.03,-627.71,-627.18,-626.71,-604.1
Eint,1118.0,1.4,1.33,-2.66,0.52,1.25,2.26,5.92


## Adsorption energy calculation (after Fe adsorption)

The adsorption energy for the DACs on N-doped carbon (after Fe adsoprtion)is defined:

$$
E_{\text{DFT}} = E_{\text{total, M1M2}} - E_{\text{pristine}} - E_{\text{M1}} - E_{\text{M2/cavity}}
$$

where\
$E_{\text{total, M1M2}}$ is the total energy of the DAC with the two metals adsorbed\
$E_{\text{pristine}}$ is the total energy of the pristine structure\
$E_{\text{M1}}$ is the cohesive energy of the M1 adsorbate\
$E_{\text{M2/cavity}}$ is the total energy of the M2 adsorbed in the pristine cavity

This code computes the **adsorption energy** (`Edft_fe`) for dual-atom catalysts by assessing the energy change when metal **M1** is added to a site where **M2** is already adsorbed. Also, it adds the features from the SACs on doped carbon.

In [80]:
total_energy_dacs_df['cavity'] = total_energy_dacs_df['cavity'].apply(adjust_names)


# Merge the dataframes: left on 'cavity', right on 'system'
merged_df = pd.merge(total_energy_dacs_df, total_energy_sacs_dc_prist_df, left_on='cavity', right_on='system', suffixes=('_dacs', '_sacs_pristine'))

merged_x1_df = pd.merge(merged_df, metal_df, left_on='M1', right_on='metal')

# Create a new column 'Ecoh_m1' by assigning the values from the 'Ecoh' column
merged_x1_df['Ecoh_m1'] = merged_x1_df['Ecoh']


merged_x1_x2_df = pd.merge(merged_x1_df, total_energy_sacs_df[['system', 'tot_energy']].rename(columns={'tot_energy': 'tot_energy_M2'}), left_on='M2_cavity', right_on='system')

# Subtract the 'tot_energy' columns
merged_x1_x2_df['Edft_fe'] = merged_x1_x2_df['tot_energy_dacs'] - merged_x1_x2_df['Ecoh_m1'] - merged_x1_x2_df['tot_energy_M2']

# Dropping duplicates
merged_x1_x2_df = merged_x1_x2_df.drop_duplicates()


Edft_fe_df =merged_x1_x2_df

# Define the path where you want to save the CSV file
save_path = os.path.join(dacs_energies_out_dir, 'Edft_fe_df.csv')

# Save the dataframe to a CSV file
Edft_fe_df.to_csv(save_path, index=False, header=True)


| **Feature Name**             | **Description**                                                                                   |
|------------------------------|-------------------------------------------------------------------------------------------------|
| `system_dacs`                | Identifier for the dual-atom catalyst (DAC) system, combining M1 and M2 adsorbed on a cavity.   |
| `tot_energy_dacs`            | Total DFT energy of the DAC system with both M1 and M2 metals adsorbed.                         |
| `M1`                        | Symbol or type of the first metal atom in the DAC.                                              |
| `M2`                        | Symbol or type of the second metal atom in the DAC.                                             |
| `heteroatom_dacs`            | Type of heteroatom(s) present in the DAC support structure (e.g., N, B, O).                     |
| `basic_cavity_dacs`          | Simplified or generalized descriptor of the DAC binding site or cavity.                         |
| `cavity`                    | Original cavity identifier associated with the DAC structure.                                  |
| `cavity_v2`                 | Cleaned or standardized version of `cavity` for consistent analysis and plotting.              |
| `cavity_3`                  | Additional refined or grouped cavity classification.                                            |
| `M1_cavity`                 | Identifier for the site or cavity associated with the first metal (M1) adsorbed singly.         |
| `cavity_4`                  | Another variant of cavity categorization used for filtering or grouping.                        |
| `M2_cavity`                 | Identifier for the site or cavity associated with the second metal (M2) adsorbed singly.        |
| `system_sacs_pristine`      | Identifier for pristine single-atom catalyst (SAC) systems without metals adsorbed.             |
| `tot_energy_sacs_pristine`  | Total DFT energy of the pristine SAC system (without metal adsorption).                         |
| `heteroatom_sacs_pristine`  | Heteroatom type(s) present in the pristine SAC support.                                        |
| `basic_cavity_sacs_pristine`| Simplified descriptor of the pristine SAC cavity site.                                         |
| `metal`                     | Metal type or symbol associated with cohesive and atomic properties.                           |
| `Ecoh`                      | Cohesive energy of the metal atom, representing the energy needed to break it into atoms.      |
| `atomic_mass`               | Atomic mass of the metal (in atomic mass units).                                               |
| `vdw_radius`                | Van der Waals radius of the metal atom (in angstroms).                                        |
| `r_cov_sb`                  | Covalent radius for single bonds (in angstroms).                                              |
| `r_cov_db`                  | Covalent radius for double bonds (in angstroms).                                              |
| `dipole_polarizability`    | Dipole polarizability of the metal atom, indicating how easily its electron cloud distorts.    |
| `ionic_radii_crystals`     | Ionic radius of the metal in crystal structures (in angstroms).                               |
| `d_center_sp`               | d-band center position relative to the Fermi level, an electronic descriptor relevant to catalysis. |
| `Paul_electroneg`           | Pauling electronegativity of the metal atom.                                                  |
| `MB_electroneg`             | Mulliken–Brønsted electronegativity, another measure of electronegativity.                    |
| `electron_affinity`         | Electron affinity of the metal atom (energy released when adding an electron).                 |
| `covalent_radius`           | Covalent radius of the metal atom (in angstroms).                                             |
| `atomic_number`             | Atomic number (number of protons) of the metal element.                                       |
| `Ion_energ_I`               | First ionization energy of the metal atom (energy to remove first electron).                   |
| `Ion_energ_II`              | Second ionization energy of the metal atom.                                                   |
| `Zung_radius`               | Zunger effective ionic radius (a specific empirical radius).                                 |
| `Coh_radius`                | Cohn effective atomic radius.                                                                 |
| `Waber_radius`              | Waber atomic radius, another empirical measure of atomic size.                                |
| `mied_param_h`              | Miedema model parameter h, related to enthalpy calculations in alloys.                        |
| `mied_param_phi`            | Miedema model parameter phi, related to electronegativity differences in alloys.              |
| `HOMO`                      | Highest Occupied Molecular Orbital energy level of the metal atom or system.                   |
| `LUMO`                      | Lowest Unoccupied Molecular Orbital energy level.                                             |
| `mag_moment_bulk_d`         | Magnetic moment of the metal in bulk form, from d-electrons.                                 |
| `E_Fermi`                   | Fermi energy level of the metal or system (in eV).                                           |
| `E_Fermi2`                  | Secondary Fermi energy measurement or corrected value.                                       |
| `Ecoh_m1`                   | Cohesive energy specifically assigned to M1 in the merged dataset.                           |
| `system`                    | Generic system identifier used in merged datasets for M2 or other references.                 |
| `tot_energy_M2`             | Total DFT energy of the system with only M2 adsorbed.                                        |
| `Edft_fe`                   | Calculated stepwise adsorption energy measuring the energy change when adding M1 to M2/cavity.|


In [81]:
Edft_fe_df.shape

(1218, 46)

In [82]:
print(", ".join(Edft_fe_df.columns))

('system_dacs, tot_energy_dacs, M1, M2, heteroatom_dacs, basic_cavity_dacs, '
 'cavity, cavity_v2, cavity_3, M1_cavity, cavity_4, M2_cavity, '
 'system_sacs_pristine, tot_energy_sacs_pristine, heteroatom_sacs_pristine, '
 'basic_cavity_sacs_pristine, metal, Ecoh, atomic_mass, vdw_radius, r_cov_sb, '
 'r_cov_db, dipole_polarizability, ionic_radii_crystals, d_center_sp, '
 'Paul_electroneg, MB_electroneg, electron_affinity, covalent_radius, '
 'atomic_number, Ion_energ_I, Ion_energ_II, Zung_radius, Coh_radius, '
 'Waber_radius, mied_param_h, mied_param_phi, HOMO , LUMO, mag_moment_bulk_d,  '
 'E_Fermi, E_Fermi2, Ecoh_m1, system, tot_energy_M2, Edft_fe')


In [83]:
Edft_fe_df.sample()

Unnamed: 0,system_dacs,tot_energy_dacs,M1,M2,heteroatom_dacs,basic_cavity_dacs,cavity,cavity_v2,cavity_3,M1_cavity,...,mied_param_phi,HOMO,LUMO,mag_moment_bulk_d,E_Fermi,E_Fermi2,Ecoh_m1,system,tot_energy_M2,Edft_fe
1104,Ru_Fe_N_din6_s_c2_02,-616.76,Ru,Fe,N,din6,N_din6_s_c2_02,N_din6_s_c2_02,2,Ru_N_din6_s_c2_02,...,5.4,-3.9951,-3.6556,13.072705,12,12.1,-9.812466,Fe_N_din6_s_c2_02,-608.204,1.256466


In [84]:
Edft_fe_df.describe().T.round(2)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy_dacs,1218.0,-630.55,8.54,-640.75,-636.55,-633.98,-629.15,-606.83
tot_energy_sacs_pristine,1218.0,-613.32,6.97,-618.51,-616.87,-616.12,-614.85,-593.77
Ecoh,1218.0,-6.27,3.06,-11.84,-8.62,-6.37,-3.88,-1.23
atomic_mass,1218.0,114.83,53.57,55.84,63.55,104.66,190.23,196.97
vdw_radius,1218.0,208.29,7.0,196.0,201.0,210.5,213.0,218.0
r_cov_sb,1218.0,121.36,7.17,110.0,116.0,122.5,125.0,136.0
r_cov_db,1218.0,116.86,11.49,101.0,110.0,115.0,120.0,144.0
dipole_polarizability,1218.0,50.81,11.71,26.14,46.0,51.5,57.0,72.0
ionic_radii_crystals,1218.0,0.62,0.09,0.39,0.58,0.64,0.68,0.78
d_center_sp,1218.0,-3.87,2.0,-9.0,-4.1,-3.27,-2.55,-1.91


## Boxplot of the Edft_fe by metal 

In [85]:
fig = plot_categorical_energy_boxplot(
    df=Edft_fe_df,
    x_col='M1',
    y_col='Edft_fe',
    color_map=metal_colors,
    hover_col='system_dacs',
    x_title='Metal',
    y_title='Edft_fe Energy',

    save_dir='../../../data/figures/dacs_eda',
    file_name='Edft_fe_M1_metal_boxplot'
)

fig.show()


## Boxplot of the Edft_fe by metal - no outliers

In [86]:
# Map colors to the metal types in the dataframe
Edft_fe_df['color'] = Edft_fe_df['M1'].map(metal_colors)

# Detect outliers
outliers_dacs_metal_Edft_fe = detect_outliers(Edft_fe_df, 'M1', 'Edft_fe')

# Save the outliers to a CSV file
outliers_path = os.path.join(dacs_energies_out_dir, 'outliers_dacs_metal_dft_Edft_fe.csv')
outliers_dacs_metal_Edft_fe.to_csv(outliers_path, index=False, header=True)

# Remove outliers from the dataset
Edft_fe_df_no_outliers_metal = Edft_fe_df[~Edft_fe_df.index.isin(outliers_dacs_metal_Edft_fe.index)]

fig = plot_categorical_energy_boxplot(
    df=Edft_fe_df_no_outliers_metal,
    x_col='M1',
    y_col='Edft_fe',
    color_map=metal_colors,
    hover_col='system_dacs',
    x_title='Metal',
    y_title='Edft_fe Energy',
    save_dir='../../../data/figures/dacs_eda',
    file_name='Edft_fe_M1_metal_boxplot_no_outliers'
)

fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



## Boxplot of the Edft_fe by cavity

In [87]:
plot_categorical_energy_boxplot(
    df=Edft_fe_df,
    x_col='cavity_v2',
    y_col='Edft_fe',
    color_map=cavity_colors,
    hover_col='system_dacs',
    save_dir='../../../data/figures/dacs_eda',
    x_title='Cavity',
    y_title='E_dft_Fe Energy',
    file_name='Edft_fe_M1_cavity_boxplot'
)



## Boxplot of the Edft_fe by cavity - no outliers

In [88]:
# Assuming Edft_coh_df and metal_colors are already defined

# Map colors to the metal types in the dataframe
Edft_fe_df['color'] = Edft_fe_df['cavity_v2'].map(cavity_colors)

# Detect outliers
outliers_cavity_dacs_Edft_fe = detect_outliers(Edft_fe_df, 'cavity_v2', 'Edft_fe')



# Save the outliers to a CSV file
outliers_path = os.path.join(dacs_energies_out_dir, 'outliers_dacs_cavity_Edft_fe.csv')
outliers_cavity_dacs_Edft_fe.to_csv(outliers_path, index=False, header=True)

# Remove outliers from the dataset
Edft_fe_df_no_outliers_cav = Edft_fe_df[~Edft_fe_df.index.isin(outliers_cavity_dacs_Edft_fe.index)]

plot_categorical_energy_boxplot(
    df=Edft_fe_df_no_outliers_cav,
    x_col='cavity_v2',
    y_col='Edft_fe',
    color_map=cavity_colors,
    hover_col='system_dacs',
    save_dir='../../../data/figures/dacs_eda',
    x_title='Cavity',
    y_title='E_dft_Fe Energy',
    file_name='Edft_fe_M1_cavity_boxplot_no_outliers'
)





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



## Data Cleaning: Removing Metal and Cavity Outliers

In [89]:
# Combine indices from outliers_dacs_metal and outliers_dacs_cavity
combined_outliers_indices = outliers_dacs_metal_Edft_fe.index.union(outliers_cavity_dacs_Edft_fe.index)

Edft_fe_df_iqr = Edft_fe_df[~Edft_fe_df.index.isin(combined_outliers_indices)]

In [90]:
# Define the path where you want to save the CSV file
save_path_csv = os.path.join(dacs_energies_out_dir, 'Edft_fe_df_iqr.csv')

# Save the dataframe to a CSV file
Edft_fe_df_iqr.to_csv(save_path_csv, index=False, header=True)

# Define the path where you want to save the pickle file
save_path_pkl = os.path.join(dacs_energies_out_dir, 'Edft_fe_df_iqr.pkl')

# Save the dataframe to a pickle file
Edft_fe_df_iqr.to_pickle(save_path_pkl)


| **Feature Name**             | **Description**                                                                                   |
|------------------------------|-------------------------------------------------------------------------------------------------|
| `system_dacs`                | Identifier for the dual-atom catalyst (DAC) system, combining M1 and M2 adsorbed on a cavity.   |
| `tot_energy_dacs`            | Total DFT energy of the DAC system with both M1 and M2 metals adsorbed.                         |
| `M1`                        | Symbol or type of the first metal atom in the DAC.                                              |
| `M2`                        | Symbol or type of the second metal atom in the DAC.                                             |
| `heteroatom_dacs`            | Type of heteroatom(s) present in the DAC support structure (e.g., N, B, O).                     |
| `basic_cavity_dacs`          | Simplified or generalized descriptor of the DAC binding site or cavity.                         |
| `cavity`                    | Original cavity identifier associated with the DAC structure.                                  |
| `cavity_v2`                 | Cleaned or standardized version of `cavity` for consistent analysis and plotting.              |
| `cavity_3`                  | Additional refined or grouped cavity classification.                                            |
| `M1_cavity`                 | Identifier for the site or cavity associated with the first metal (M1) adsorbed singly.         |
| `cavity_4`                  | Another variant of cavity categorization used for filtering or grouping.                        |
| `M2_cavity`                 | Identifier for the site or cavity associated with the second metal (M2) adsorbed singly.        |
| `system_sacs_pristine`      | Identifier for pristine single-atom catalyst (SAC) systems without metals adsorbed.             |
| `tot_energy_sacs_pristine`  | Total DFT energy of the pristine SAC system (without metal adsorption).                         |
| `heteroatom_sacs_pristine`  | Heteroatom type(s) present in the pristine SAC support.                                        |
| `basic_cavity_sacs_pristine`| Simplified descriptor of the pristine SAC cavity site.                                         |
| `metal`                     | Metal type or symbol associated with cohesive and atomic properties.                           |
| `Ecoh`                      | Cohesive energy of the metal atom, representing the energy needed to break it into atoms.      |
| `atomic_mass`               | Atomic mass of the metal (in atomic mass units).                                               |
| `vdw_radius`                | Van der Waals radius of the metal atom (in angstroms).                                        |
| `r_cov_sb`                  | Covalent radius for single bonds (in angstroms).                                              |
| `r_cov_db`                  | Covalent radius for double bonds (in angstroms).                                              |
| `dipole_polarizability`    | Dipole polarizability of the metal atom, indicating how easily its electron cloud distorts.    |
| `ionic_radii_crystals`     | Ionic radius of the metal in crystal structures (in angstroms).                               |
| `d_center_sp`               | d-band center position relative to the Fermi level, an electronic descriptor relevant to catalysis. |
| `Paul_electroneg`           | Pauling electronegativity of the metal atom.                                                  |
| `MB_electroneg`             | Mulliken–Brønsted electronegativity, another measure of electronegativity.                    |
| `electron_affinity`         | Electron affinity of the metal atom (energy released when adding an electron).                 |
| `covalent_radius`           | Covalent radius of the metal atom (in angstroms).                                             |
| `atomic_number`             | Atomic number (number of protons) of the metal element.                                       |
| `Ion_energ_I`               | First ionization energy of the metal atom (energy to remove first electron).                   |
| `Ion_energ_II`              | Second ionization energy of the metal atom.                                                   |
| `Zung_radius`               | Zunger effective ionic radius (a specific empirical radius).                                 |
| `Coh_radius`                | Cohn effective atomic radius.                                                                 |
| `Waber_radius`              | Waber atomic radius, another empirical measure of atomic size.                                |
| `mied_param_h`              | Miedema model parameter h, related to enthalpy calculations in alloys.                        |
| `mied_param_phi`            | Miedema model parameter phi, related to electronegativity differences in alloys.              |
| `HOMO`                      | Highest Occupied Molecular Orbital energy level of the metal atom or system.                   |
| `LUMO`                      | Lowest Unoccupied Molecular Orbital energy level.                                             |
| `mag_moment_bulk_d`         | Magnetic moment of the metal in bulk form, from d-electrons.                                 |
| `E_Fermi`                   | Fermi energy level of the metal or system (in eV).                                           |
| `E_Fermi2`                  | Secondary Fermi energy measurement or corrected value.                                       |
| `Ecoh_m1`                   | Cohesive energy specifically assigned to M1 in the merged dataset.                           |
| `system`                    | Generic system identifier used in merged datasets for M2 or other references.                 |
| `tot_energy_M2`             | Total DFT energy of the system with only M2 adsorbed.                                        |
| `Edft_fe`                   | Calculated stepwise adsorption energy measuring the energy change when adding M1 to M2/cavity.|
| `color`                    | Color code assigned based on categorical mapping (e.g., metal or cavity) for plotting purposes.|


In [91]:
Edft_fe_df_iqr.shape

(1162, 47)

In [92]:
print(", ".join(Edft_fe_df_iqr.columns))

('system_dacs, tot_energy_dacs, M1, M2, heteroatom_dacs, basic_cavity_dacs, '
 'cavity, cavity_v2, cavity_3, M1_cavity, cavity_4, M2_cavity, '
 'system_sacs_pristine, tot_energy_sacs_pristine, heteroatom_sacs_pristine, '
 'basic_cavity_sacs_pristine, metal, Ecoh, atomic_mass, vdw_radius, r_cov_sb, '
 'r_cov_db, dipole_polarizability, ionic_radii_crystals, d_center_sp, '
 'Paul_electroneg, MB_electroneg, electron_affinity, covalent_radius, '
 'atomic_number, Ion_energ_I, Ion_energ_II, Zung_radius, Coh_radius, '
 'Waber_radius, mied_param_h, mied_param_phi, HOMO , LUMO, mag_moment_bulk_d,  '
 'E_Fermi, E_Fermi2, Ecoh_m1, system, tot_energy_M2, Edft_fe, color')


In [93]:
Edft_fe_df_iqr.sample()

Unnamed: 0,system_dacs,tot_energy_dacs,M1,M2,heteroatom_dacs,basic_cavity_dacs,cavity,cavity_v2,cavity_3,M1_cavity,...,HOMO,LUMO,mag_moment_bulk_d,E_Fermi,E_Fermi2,Ecoh_m1,system,tot_energy_M2,Edft_fe,color
227,Co_Fe_N_din4_x2_c2_g_v2,-637.596,Co,Fe,N,din4,N_din4_x2_c2_g,N_din4_x2_c2_g_v2,2,Co_N_din4_x2_c2_g_v2,...,-3.9514,-3.8452,12.365391,3,3.1,-7.38444,Fe_N_din4_x2_c2_g_v2,-627.953,-2.258561,red


In [94]:
Edft_fe_df_iqr.describe().T.round(2)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
tot_energy_dacs,1162.0,-630.88,8.28,-640.75,-636.55,-634.07,-629.44,-606.83
tot_energy_sacs_pristine,1162.0,-613.58,6.74,-618.51,-616.87,-616.12,-614.98,-593.77
Ecoh,1162.0,-6.24,3.04,-11.84,-8.62,-5.87,-3.88,-1.23
atomic_mass,1162.0,114.62,53.43,55.84,63.55,102.91,190.23,196.97
vdw_radius,1162.0,208.26,6.98,196.0,201.0,210.0,213.0,218.0
r_cov_sb,1162.0,121.33,7.12,110.0,116.0,122.0,125.0,136.0
r_cov_db,1162.0,116.91,11.43,101.0,110.0,115.0,120.0,144.0
dipole_polarizability,1162.0,50.74,11.78,26.14,46.0,49.0,57.0,72.0
ionic_radii_crystals,1162.0,0.62,0.09,0.39,0.58,0.64,0.68,0.78
d_center_sp,1162.0,-3.87,2.0,-9.0,-4.1,-3.19,-2.55,-1.91
