In [ ]:
import pandas as pd
import numpy as np
from pymatgen.core.structure import Structure

In [4]:
# Load the original DFT data
df = pd.read_csv('../data/DFT_data.csv')

# Create a new column called 'flipped' and set it to empty string for original data
df['flipped'] = ''

# Check the first few rows
df.head()

Unnamed: 0.1,Unnamed: 0,mpid,miller,term,energy,bulk_energy,convergence,Fermi,slab,nsites,...,sym,sym_vac,broken_bonds,area,opt_tol,max_normal_search,cleavage_energy,WF_bottom,WF_top,flipped
0,1,mp-1058581,m111,0,-15294.75699,-15298.81608,550-4-4-1,1.7996,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,14,74.984486,1.0,2,0.027066,2.301425,2.287049,
1,3,mp-1058581,m110,0,-15296.12141,-15298.84052,550-4-4-1,2.2986,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,8,66.538369,0.1,3,0.020433,2.338018,2.360218,
2,5,mp-1058581,m101,0,-15295.70542,-7649.382492,550-4-4-1,2.4416,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,10,67.756083,0.1,1,0.022578,2.312259,2.310812,
3,7,mp-1058581,m100,0,-11471.47077,-3824.709179,550-4-4-1,1.9164,"{'@module': 'pymatgen.core.structure', '@class...",12,...,True,True,8,58.271504,0.1,0,0.022796,2.324314,2.317646,
4,9,mp-1058581,m011,0,-15296.59636,-7649.397502,550-6-2-1,2.7173,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,5,47.192212,0.1,1,0.023295,2.347947,2.380204,


In [ ]:
# Define function to flip the slab structure using pure pymatgen
def flip_slab(struct_dict):
    """
    Flips a slab structure by inverting z-coordinates with safeguards 
    to maintain structural integrity.
    
    Args:
        struct_dict: Dictionary representation of a pymatgen Structure
        
    Returns:
        Dictionary representation of the flipped structure
    """
    # Convert dict to pymatgen Structure
    structure = Structure.from_dict(struct_dict)
    
    # Get lattice vectors and create a new lattice with z-direction flipped if needed
    matrix = structure.lattice.matrix.copy()
    if matrix[2][2] < 0:  # If c-vector points downward
        matrix[2] = -matrix[2]  # Flip it to point upward
        
    # Create a new structure with the same lattice
    flipped_structure = Structure(
        lattice=matrix,
        species=[site.species for site in structure],
        coords=[site.coords for site in structure],
        coords_are_cartesian=True,
        site_properties=structure.site_properties
    )
    
    # Get the maximum z coordinate
    max_z = max([site.coords[2] for site in flipped_structure])
    
    # Flip all atom positions along z-axis
    for i, site in enumerate(flipped_structure):
        x, y, z = site.coords
        flipped_structure[i] = site.species, [x, y, max_z - z]
    
    # Ensure the structure is properly normalized
    flipped_structure.sort()
    
    return flipped_structure.as_dict()

In [6]:
# Create a duplicate dataframe for the flipped structures
df_flipped = df.copy()

# Set the flipped flag to 'flipped' string
df_flipped['flipped'] = 'flipped'

# Apply the flip operation to each slab structure
df_flipped['slab'] = df_flipped['slab'].apply(eval).apply(flip_slab).apply(str)

# Swap top and bottom work functions
df_flipped['WF_temp'] = df_flipped['WF_top']
df_flipped['WF_top'] = df_flipped['WF_bottom']
df_flipped['WF_bottom'] = df_flipped['WF_temp']
df_flipped = df_flipped.drop('WF_temp', axis=1)

# Note: cleavage_energy stays the same

In [7]:
# Concatenate the original and flipped dataframes
df_augmented = pd.concat([df, df_flipped], ignore_index=True)

# Examine the results
print(f"Original data size: {len(df)}")
print(f"Augmented data size: {len(df_augmented)}")
df_augmented.head()

Original data size: 36852
Augmented data size: 73704


Unnamed: 0.1,Unnamed: 0,mpid,miller,term,energy,bulk_energy,convergence,Fermi,slab,nsites,...,sym,sym_vac,broken_bonds,area,opt_tol,max_normal_search,cleavage_energy,WF_bottom,WF_top,flipped
0,1,mp-1058581,m111,0,-15294.75699,-15298.81608,550-4-4-1,1.7996,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,14,74.984486,1.0,2,0.027066,2.301425,2.287049,
1,3,mp-1058581,m110,0,-15296.12141,-15298.84052,550-4-4-1,2.2986,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,8,66.538369,0.1,3,0.020433,2.338018,2.360218,
2,5,mp-1058581,m101,0,-15295.70542,-7649.382492,550-4-4-1,2.4416,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,10,67.756083,0.1,1,0.022578,2.312259,2.310812,
3,7,mp-1058581,m100,0,-11471.47077,-3824.709179,550-4-4-1,1.9164,"{'@module': 'pymatgen.core.structure', '@class...",12,...,True,True,8,58.271504,0.1,0,0.022796,2.324314,2.317646,
4,9,mp-1058581,m011,0,-15296.59636,-7649.397502,550-6-2-1,2.7173,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,5,47.192212,0.1,1,0.023295,2.347947,2.380204,


In [8]:
# Save the augmented dataset
df_augmented.to_csv('../data/DFT_data_augmented.csv', index=False)
print("Saved augmented dataset to ../data/DFT_data_augmented.csv")

Saved augmented dataset to ../data/DFT_data_augmented.csv


In [ ]:
# Let's verify the flipping worked correctly by examining one example
# Choose a sample structure from the original dataset
sample_idx = 0  # First structure
original_struct = Structure.from_dict(eval(df.iloc[sample_idx]['slab']))
flipped_struct = Structure.from_dict(eval(df_flipped.iloc[sample_idx]['slab']))

# Print information about both structures
print("ORIGINAL STRUCTURE:")
print(f"Lattice parameters: a={original_struct.lattice.a:.4f}, b={original_struct.lattice.b:.4f}, c={original_struct.lattice.c:.4f}")
print(f"Angles: alpha={original_struct.lattice.alpha:.2f}°, beta={original_struct.lattice.beta:.2f}°, gamma={original_struct.lattice.gamma:.2f}°")
print(f"Volume: {original_struct.volume:.2f} Å³")
print(f"First 3 atomic z-coordinates: {[site.coords[2] for site in original_struct][:3]}")

print("\nFLIPPED STRUCTURE:")
print(f"Lattice parameters: a={flipped_struct.lattice.a:.4f}, b={flipped_struct.lattice.b:.4f}, c={flipped_struct.lattice.c:.4f}")
print(f"Angles: alpha={flipped_struct.lattice.alpha:.2f}°, beta={flipped_struct.lattice.beta:.2f}°, gamma={flipped_struct.lattice.gamma:.2f}°")
print(f"Volume: {flipped_struct.volume:.2f} Å³")
print(f"First 3 atomic z-coordinates: {[site.coords[2] for site in flipped_struct][:3]}")

# Compare work functions
print("\nWORK FUNCTIONS:")
print(f"Original - Top: {df.iloc[sample_idx]['WF_top']:.6f}, Bottom: {df.iloc[sample_idx]['WF_bottom']:.6f}")
print(f"Flipped  - Top: {df_flipped.iloc[sample_idx]['WF_top']:.6f}, Bottom: {df_flipped.iloc[sample_idx]['WF_bottom']:.6f}")