In [3]:
import pandas as pd
import numpy as np
from pymatgen.core.structure import Structure

In [None]:
# Load the original DFT data
df = pd.read_csv('../data/DFT_data.csv')

# Create a new column called 'flipped' and set it to empty string for original data
df['flipped'] = ''

# Check the first few rows
df.head()

In [None]:
# Define function to flip the slab structure
def flip_slab(struct_dict):
    # Convert dict to pymatgen Structure
    structure = Structure.from_dict(struct_dict)
    
    # Flip the structure (mirror along z-axis)
    # Get the maximum z coordinate
    max_z = max([site.coords[2] for site in structure])
    
    # Create a new structure with flipped z coordinates
    flipped_structure = structure.copy()
    for i, site in enumerate(flipped_structure):
        # Reflect the z coordinate: new_z = max_z - (z - 0)
        x, y, z = site.coords
        flipped_structure[i] = site.species, [x, y, max_z - z]
    
    return flipped_structure.as_dict()

In [None]:
# Create a duplicate dataframe for the flipped structures
df_flipped = df.copy()

# Set the flipped flag to 'flipped' string
df_flipped['flipped'] = 'flipped'

# Apply the flip operation to each slab structure
df_flipped['slab'] = df_flipped['slab'].apply(eval).apply(flip_slab).apply(str)

# Swap top and bottom work functions
df_flipped['WF_temp'] = df_flipped['WF_top']
df_flipped['WF_top'] = df_flipped['WF_bottom']
df_flipped['WF_bottom'] = df_flipped['WF_temp']
df_flipped = df_flipped.drop('WF_temp', axis=1)

# Note: cleavage_energy stays the same

In [None]:
# Concatenate the original and flipped dataframes
df_augmented = pd.concat([df, df_flipped], ignore_index=True)

# Examine the results
print(f"Original data size: {len(df)}")
print(f"Augmented data size: {len(df_augmented)}")
df_augmented.head()

In [None]:
# Save the augmented dataset
df_augmented.to_csv('../data/DFT_data_augmented.csv', index=False)
print("Saved augmented dataset to ../data/DFT_data_augmented.csv")