In [ ]:
import pandas as pd
import numpy as np
import math
from collections import defaultdict
import random
from pymatgen.core.structure import Structure
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
import ase

In [4]:
# Load the original DFT data
df = pd.read_csv('../data/DFT_data.csv')

# Create a new column called 'flipped' and set it to empty string for original data
df['flipped'] = ''

# Check the first few rows
df.head()

Unnamed: 0.1,Unnamed: 0,mpid,miller,term,energy,bulk_energy,convergence,Fermi,slab,nsites,...,sym,sym_vac,broken_bonds,area,opt_tol,max_normal_search,cleavage_energy,WF_bottom,WF_top,flipped
0,1,mp-1058581,m111,0,-15294.75699,-15298.81608,550-4-4-1,1.7996,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,14,74.984486,1.0,2,0.027066,2.301425,2.287049,
1,3,mp-1058581,m110,0,-15296.12141,-15298.84052,550-4-4-1,2.2986,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,8,66.538369,0.1,3,0.020433,2.338018,2.360218,
2,5,mp-1058581,m101,0,-15295.70542,-7649.382492,550-4-4-1,2.4416,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,10,67.756083,0.1,1,0.022578,2.312259,2.310812,
3,7,mp-1058581,m100,0,-11471.47077,-3824.709179,550-4-4-1,1.9164,"{'@module': 'pymatgen.core.structure', '@class...",12,...,True,True,8,58.271504,0.1,0,0.022796,2.324314,2.317646,
4,9,mp-1058581,m011,0,-15296.59636,-7649.397502,550-6-2-1,2.7173,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,5,47.192212,0.1,1,0.023295,2.347947,2.380204,


In [ ]:
# Define helper functions for structure analysis and flipping

def is_structure_invertible(structure):
    '''
    This function figures out whether or not an `pymatgen.Structure` object has
    symmetricity. In this function, the affine matrix is a rotation matrix that
    is multiplied with the XYZ positions of the crystal. If the z,z component
    of that is negative, it means symmetry operation exist, it could be a
    mirror operation, or one that involves multiple rotations/etc. Regardless,
    it means that the top becomes the bottom and vice-versa, and the structure
    is the symmetric. i.e. structure_XYZ = structure_XYZ*M.

    In short:  If this function returns `False`, then the input structure can
    be flipped in the z-direction to create a new structure.

    Arg:
        structure   A `pymatgen.Structure` object.
    Returns
        A boolean indicating whether or not your `ase.Atoms` object is
        symmetric in z-direction (i.e. symmetric with respect to x-y plane).
    '''
    # If any of the operations involve a transformation in the z-direction,
    # then the structure is invertible.
    try:
        sga = SpacegroupAnalyzer(structure, symprec=0.1)
        for operation in sga.get_symmetry_operations():
            xform_matrix = operation.affine_matrix
            z_xform = xform_matrix[2, 2]
            if z_xform == -1:
                return True
        return False
    except Exception as e:
        print(f"Error checking invertibility: {e}")
        return False

def flip_struct(struct):
    '''
    Flips an atoms object upside down. Normally used to flip surfaces.

    Arg:
        atoms   `pymatgen.Structure` object
    Returns:
        flipped_struct  The same `ase.Atoms` object that was fed as an
                        argument, but flipped upside down.
    '''
    atoms = AseAtomsAdaptor.get_atoms(struct)

    # This is black magic wizardry to me. Good look figuring it out.
    atoms.wrap()
    atoms.rotate(180, 'x', rotate_cell=True, center='COM')
    if atoms.cell[2][2] < 0.:
        atoms.cell[2] = -atoms.cell[2]
    if np.cross(atoms.cell[0], atoms.cell[1])[2] < 0.0:
        atoms.cell[1] = -atoms.cell[1]
    atoms.wrap()

    flipped_struct = AseAtomsAdaptor.get_structure(atoms)
    return flipped_struct

# Define function to flip the slab structure using both functions above
def flip_slab(struct_dict):
    """
    Flips a crystal structure upside down using robust ASE-based approach.
    First checks if the structure is invertible (symmetric in z-direction).
    
    Args:
        struct_dict: Dictionary representation of a pymatgen Structure
        
    Returns:
        Dictionary representation of the flipped structure
    """
    # Convert dict to pymatgen Structure
    structure = Structure.from_dict(struct_dict)
    
    # Check if structure is invertible (symmetric in z)
    invertible = is_structure_invertible(structure)
    if invertible:
        print("Structure is symmetric in z-direction, flipping may not create a new structure")
    
    # Flip the structure using the provided function
    flipped_structure = flip_struct(structure)
    
    return flipped_structure.as_dict()

In [ ]:
# Create a duplicate dataframe for the flipped structures
df_flipped = df.copy()

# Set the flipped flag to 'flipped' string
df_flipped['flipped'] = 'flipped'

# Add a counter to keep track of progress
total_structures = len(df_flipped)
symmetric_count = 0
error_count = 0

# Add error handling to make the process more robust
def safely_flip_structure(slab_str, index=None):
    global symmetric_count, error_count
    try:
        # Show progress every 100 structures
        if index is not None and index % 100 == 0:
            print(f"Processing structure {index}/{total_structures}...")
        
        struct_dict = eval(slab_str)
        structure = Structure.from_dict(struct_dict)
        
        # Check if structure is invertible (symmetric in z)
        invertible = is_structure_invertible(structure)
        if invertible:
            symmetric_count += 1
            # Still flip it, but we're keeping count
        
        flipped_dict = flip_slab(struct_dict)
        return str(flipped_dict)
    except Exception as e:
        error_count += 1
        if index is not None and index % 100 == 0:
            print(f"Error flipping structure {index}: {e}")
        return slab_str  # Return original if flipping fails

# Apply the flip operation to each slab structure with error handling
print("Flipping structures - this may take some time...")
df_flipped['slab'] = df_flipped['slab'].apply(lambda x, i=0: safely_flip_structure(x, i))

# Swap top and bottom work functions
df_flipped['WF_temp'] = df_flipped['WF_top']
df_flipped['WF_top'] = df_flipped['WF_bottom']
df_flipped['WF_bottom'] = df_flipped['WF_temp']
df_flipped = df_flipped.drop('WF_temp', axis=1)

# Print summary
print(f"Flipping complete! Processed {total_structures} structures")
print(f"Symmetric structures: {symmetric_count}")
print(f"Errors encountered: {error_count}")

In [7]:
# Concatenate the original and flipped dataframes
df_augmented = pd.concat([df, df_flipped], ignore_index=True)

# Examine the results
print(f"Original data size: {len(df)}")
print(f"Augmented data size: {len(df_augmented)}")
df_augmented.head()

Original data size: 36852
Augmented data size: 73704


Unnamed: 0.1,Unnamed: 0,mpid,miller,term,energy,bulk_energy,convergence,Fermi,slab,nsites,...,sym,sym_vac,broken_bonds,area,opt_tol,max_normal_search,cleavage_energy,WF_bottom,WF_top,flipped
0,1,mp-1058581,m111,0,-15294.75699,-15298.81608,550-4-4-1,1.7996,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,14,74.984486,1.0,2,0.027066,2.301425,2.287049,
1,3,mp-1058581,m110,0,-15296.12141,-15298.84052,550-4-4-1,2.2986,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,8,66.538369,0.1,3,0.020433,2.338018,2.360218,
2,5,mp-1058581,m101,0,-15295.70542,-7649.382492,550-4-4-1,2.4416,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,10,67.756083,0.1,1,0.022578,2.312259,2.310812,
3,7,mp-1058581,m100,0,-11471.47077,-3824.709179,550-4-4-1,1.9164,"{'@module': 'pymatgen.core.structure', '@class...",12,...,True,True,8,58.271504,0.1,0,0.022796,2.324314,2.317646,
4,9,mp-1058581,m011,0,-15296.59636,-7649.397502,550-6-2-1,2.7173,"{'@module': 'pymatgen.core.structure', '@class...",16,...,True,True,5,47.192212,0.1,1,0.023295,2.347947,2.380204,


In [8]:
# Save the augmented dataset
df_augmented.to_csv('../data/DFT_data_augmented.csv', index=False)
print("Saved augmented dataset to ../data/DFT_data_augmented.csv")

Saved augmented dataset to ../data/DFT_data_augmented.csv


In [ ]:
# Let's verify the flipping worked correctly by examining one example
# Choose a sample structure from the original dataset
sample_idx = 0  # First structure

# Get structures from before and after flipping
original_str = df.iloc[sample_idx]['slab']
original_struct = Structure.from_dict(eval(original_str))

# Apply our flip function directly to check
try:
    flipped_dict = flip_slab(eval(original_str))
    flipped_struct = Structure.from_dict(flipped_dict)
    flipping_successful = True
except Exception as e:
    print(f"Error during verification: {e}")
    flipping_successful = False

if flipping_successful:
    # Print information about both structures
    print("ORIGINAL STRUCTURE:")
    print(f"Lattice parameters: a={original_struct.lattice.a:.4f}, b={original_struct.lattice.b:.4f}, c={original_struct.lattice.c:.4f}")
    print(f"Angles: alpha={original_struct.lattice.alpha:.2f}°, beta={original_struct.lattice.beta:.2f}°, gamma={original_struct.lattice.gamma:.2f}°")
    print(f"Volume: {original_struct.volume:.2f} Å³")
    print(f"Z-range: {min([site.coords[2] for site in original_struct]):.4f} - {max([site.coords[2] for site in original_struct]):.4f}")
    print(f"First 3 atomic z-coordinates: {[site.coords[2] for site in original_struct][:3]}")

    print("\nFLIPPED STRUCTURE (using ASE):")
    print(f"Lattice parameters: a={flipped_struct.lattice.a:.4f}, b={flipped_struct.lattice.b:.4f}, c={flipped_struct.lattice.c:.4f}")
    print(f"Angles: alpha={flipped_struct.lattice.alpha:.2f}°, beta={flipped_struct.lattice.beta:.2f}°, gamma={flipped_struct.lattice.gamma:.2f}°")
    print(f"Volume: {flipped_struct.volume:.2f} Å³")
    print(f"Z-range: {min([site.coords[2] for site in flipped_struct]):.4f} - {max([site.coords[2] for site in flipped_struct]):.4f}")
    print(f"First 3 atomic z-coordinates: {[site.coords[2] for site in flipped_struct][:3]}")

    # Compare work functions
    print("\nWORK FUNCTIONS:")
    print(f"Original - Top: {df.iloc[sample_idx]['WF_top']:.6f}, Bottom: {df.iloc[sample_idx]['WF_bottom']:.6f}")
    print(f"Should be - Top: {df.iloc[sample_idx]['WF_bottom']:.6f}, Bottom: {df.iloc[sample_idx]['WF_top']:.6f}")