In [1]:
import os

In [2]:
def rename_files(base_dir):
    for compound_name in os.listdir(base_dir):
        compound_dir = os.path.join(base_dir, compound_name)
        
        if os.path.isdir(compound_dir):
            old_ads60_names = ['vasprun_ads60.traj', 'ads60.traj']
            old_relax_names = ['vasprun_relax_restart.traj', 'relax_restart.traj']
            
            new_ads60_name = f"ads60_{compound_name}.traj"
            new_relax_name = f"relax_restart_{compound_name}.traj"
            
            # Rename ads60 files
            for old_name in old_ads60_names:
                old_path = os.path.join(compound_dir, old_name)
                if os.path.exists(old_path):
                    new_path = os.path.join(compound_dir, new_ads60_name)
                    os.rename(old_path, new_path)
                    print(f"Renamed {old_path} to {new_path}")
            
            # Rename relax_restart files
            for old_name in old_relax_names:
                old_path = os.path.join(compound_dir, old_name)
                if os.path.exists(old_path):
                    new_path = os.path.join(compound_dir, new_relax_name)
                    os.rename(old_path, new_path)
                    print(f"Renamed {old_path} to {new_path}")

In [3]:
base_dir = os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'DFT_Data'))

In [4]:
rename_files(base_dir)

In [5]:
import pandas as pd

In [6]:
def process_vibrational_frequencies(file_path):
    xls = pd.ExcelFile(file_path)
    
    df = pd.read_excel(xls, 'Vibrational_freq', skiprows=1)  # Skip the first header row
    df = df.dropna(how='all')

    processed_data = {}
    current_molecule = None
    current_freqs = []
    
    # We ignore the vib freqs in meVs for now
    ignore_data = False

    # Iterate over the rows of the dataframe
    for index, row in df.iterrows():
        # Check if we need to ignore data based on headers for meV section
        if ignore_data:
            break
        
        molecule_name = row[0]
        vib_freqs = row[2:].values
        
        # Check if the current row starts the meV section
        if molecule_name == 'molecule' and row[1] == 'site' and row[2] == 'vib freq (meV)':
            ignore_data = True
            continue
        
        if molecule_name != current_molecule:
            # If we encounter a new molecule, save the previous molecule's data
            if current_molecule is not None:
                processed_data[current_molecule] = current_freqs
            # Update the current molecule and reset frequencies
            current_molecule = molecule_name
            current_freqs = []

        # Separate real and imaginary frequencies
        real_freqs = []
        imaginary_freqs = []
        imaginary_found = False

        for freq in vib_freqs:
            if pd.isna(freq) and not imaginary_found:
                imaginary_found = True
                continue
            
            if not imaginary_found:
                real_freqs.append(freq)
            elif not pd.isna(freq):
                imaginary_freqs.append(freq)

        if imaginary_found and imaginary_freqs:
            imaginary_freqs = [12]*len(imaginary_freqs)
        else:
            imaginary_freqs = []

        # Combine the real and imaginary frequencies back
        all_freqs = list(real_freqs) + list(imaginary_freqs)
        current_freqs.extend(all_freqs)

    # Save the last molecule's data
    if current_molecule is not None:
        processed_data[current_molecule] = current_freqs

    # Create a new DataFrame for the processed data
    max_len = max(len(freqs) for freqs in processed_data.values())
    columns = ['molecule_name'] + [f'vib_freq_{i+1}' for i in range(max_len)]
    processed_df = pd.DataFrame(columns=columns)

    for molecule_name, freqs in processed_data.items():
        row_data = [molecule_name] + freqs + [None] * (max_len - len(freqs))
        processed_df = processed_df.append(pd.Series(row_data, index=columns), ignore_index=True)

    return processed_df

In [7]:
fpath = os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'DFT_Data', 'vibrational_freq_zpe_Ag111.xlsx'))

In [8]:
processed_df = process_vibrational_frequencies(fpath)

In [9]:
processed_df

Unnamed: 0,molecule_name,vib_freq_1,vib_freq_2,vib_freq_3,vib_freq_4,vib_freq_5,vib_freq_6,vib_freq_7,vib_freq_8,vib_freq_9,vib_freq_10,vib_freq_11,vib_freq_12,vib_freq_13,vib_freq_14,vib_freq_15
0,COX,2035.567792,249.189298,170.553307,168.94621,80.934444,78.636353,,,,,,,,,
1,COOHX,3452.599312,1692.801487,1210.909046,938.568596,640.313178,600.340532,319.38796,201.167506,153.420515,12.0,12.0,12.0,,,
2,HCOOH,3604.858579,2963.997931,1758.134213,1347.423962,1249.40384,1073.369888,1003.757145,675.032496,595.242217,79.549522,58.806437,26.276846,12.0,12.0,12.0
3,COHX,3489.548316,1214.01348,1069.202684,392.259243,257.855693,200.278954,163.80564,116.553874,12.0,,,,,,
4,CHOX,2735.486288,1713.106601,1183.474121,618.309015,361.051577,182.286635,68.696622,57.378762,12.0,,,,,,
5,CX,402.952671,397.197084,396.227223,,,,,,,,,,,,
6,CHOHX,3617.014649,2933.999972,1349.190537,1177.73787,1057.210312,715.077305,335.496838,316.59208,206.373604,180.895726,75.753768,45.954889,,,
7,CHX,3030.332147,571.678229,561.611368,438.048035,367.529036,355.222848,,,,,,,,,
8,CH2OHX,3634.078043,3009.369656,2949.596732,1442.264912,1223.884677,1170.495362,1084.158158,925.748223,578.452683,387.828924,165.304416,133.206708,128.403159,50.319674,12.0
9,CH2X,3011.744558,2956.951952,1293.166643,582.626387,435.381484,393.850357,314.975484,267.66082,12.0,,,,,,


In [10]:
def save_frequencies_to_txt(df, base_dir):
    # Iterate through the rows of the dataframe
    for index, row in df.iterrows():
        molecule_name = row['molecule_name']
       
        # Find the folder corresponding to the molecule
        molecule_folder = os.path.join(base_dir, molecule_name)
        
        if not os.path.exists(molecule_folder):
            print(f"Error: Folder for molecule '{molecule_name}' not found.")
            continue
        
        # Create the txt file path
        txt_file_path = os.path.join(molecule_folder, f'zpe_log_{molecule_name}.txt')
        
        # Open the file for writing
        with open(txt_file_path, 'w') as file:
            # Write the header
            file.write('---------------------\n')
            file.write('  #    meV     cm^-1\n')
            file.write('---------------------\n')
            
            # Collect vibrational frequencies and sort them
            freqs = [freq for freq in row[1:] if not pd.isna(freq)]  # Exclude NaNs
            freqs.sort()
            
            # Write the sorted vibrational frequencies with 2 decimal places
            for i, freq in enumerate(freqs, start=0):
                freq_mev = freq / 8.0655429
                file.write(f'{i:3}    {freq_mev:6.2f}    {freq:6.2f}\n')

In [11]:
save_frequencies_to_txt(processed_df, base_dir)

Error: Folder for molecule 'CO2' not found.


In [12]:
def read_zpe_data(file_path):
    xls = pd.ExcelFile(file_path)
    df_zpe = pd.read_excel(xls, 'ZPE', header=0)
    df_zpe = df_zpe.drop(columns=['site'])
    
    # Rename columns for clarity (optional)
    df_zpe.columns = ['molecule_name', 'zpe_eV']
    
    return df_zpe

In [13]:
zpe_df = read_zpe_data(fpath)

In [14]:
def append_zpe_to_txt(df, base_dir):
    # Iterate through the rows of the dataframe
    for index, row in df.iterrows():
        molecule_name = row['molecule_name']
        zpe = row['zpe_eV']
        
        # Find the folder corresponding to the molecule
        molecule_folder = os.path.join(base_dir, molecule_name)
        
        if not os.path.exists(molecule_folder):
            print(f"Error: Folder for molecule '{molecule_name}' not found.")
            continue
        
        # Create the txt file path
        txt_file_path = os.path.join(molecule_folder, f'zpe_log_{molecule_name}.txt')
        
        # Check if the file exists before appending
        if not os.path.isfile(txt_file_path):
            print(f"Error: File '{txt_file_path}' not found.")
            continue
        
        # Open the file for appending
        with open(txt_file_path, 'a') as file:
            # Write the ZPE line at the end of the file
            file.write('---------------------\n')
            file.write(f"Zero-point energy: {zpe} eV\n")

In [15]:
append_zpe_to_txt(zpe_df, base_dir)

Error: Folder for molecule 'CO2' not found.
