In [1]:
import os
import csv

In [2]:
working_dir = os.getcwd()

output_file_xyz = os.path.join(working_dir, "output", "xyz_coordinates.txt")
output_file_thermo = os.path.join(working_dir, "output", "thermo_library.csv")

In [3]:
def process_arkane_output(fpath):
    # Read all the lines
    with open(fpath, 'r') as file:
        lines = file.readlines()
    
    # Get the name of the molecule from the first line
    first_line = lines[0].strip()
    if first_line.startswith("# Coordinates for ") and " in Input Orientation (angstroms):" in first_line:
            molecule_name = first_line.split("# Coordinates for ")[1].split(" in Input Orientation (angstroms):")[0]
    else:
        return  # Skip this file if the first line does not match the expected format
    
    xyz = ""
    dHf_298 = None
    S_298 = None
    Tmin_low = None
    Tmax_low = None
    Tmin_high = None
    Tmax_high = None
    nasa_low = []
    nasa_high = []
    
    # Procee the lines until it reaches the line "conformer("
    for line in lines[1:]:
        string = line.strip()
        if string.startswith("conformer("):
            break
            
        # Remove the '#' symbol and leading spaces from the coordinate lines
        if string.startswith("#"):
            xyz += string[1:].strip() + "\n"
            
    num_atoms = len(xyz.strip().split("\n"))
    
    output_string = f"{num_atoms}\n{counter} {molecule_name}\n{xyz.strip()}\n\n"
    
    output_xyz.append(output_string)
            
    for line in lines[num_atoms+1:]:
        string = line.strip()
        
        # Extract enthalpy of formation
        if string.startswith("#   Enthalpy of formation (298 K)"):
            dHf_298 = float(string.split("=")[1].split("kcal/mol")[0].strip())       
            
        # Extract entropy of formation
        if string.startswith("#   Entropy of formation (298 K)"):
            S_298 = float(string.split("=")[1].split("cal/(mol*K)")[0].strip())

        # Extract NASA polynomial parameters for low temperature
        if string.startswith("coeffs = ") and nasa_low == []:
            nasa_low = [float(x) for x in string.split("[")[1].split("]")[0].split(", ")]
        
        # Extract T_min and T_max for high temperature
        if Tmin_low and string.startswith("Tmin = ") and Tmin_high is None:
            Tmin_high = float(string.split("(")[1].split(",")[0].strip())
            
        if Tmax_low and string.startswith("Tmax = ") and Tmax_high is None:
            Tmax_high = float(string.split("(")[1].split(",")[0].strip())
        
        # Extract T_min and T_max for low temperature
        if string.startswith("Tmin = ") and Tmin_low is None:
            Tmin_low = float(string.split("(")[1].split(",")[0].strip())
            
        if string.startswith("Tmax = ") and Tmax_low is None:
            Tmax_low = float(string.split("(")[1].split(",")[0].strip())
            
        # Extract NASA polynomial parameters for high temperature
        if nasa_low and string.startswith("coeffs = "):
            nasa_high = [float(x) for x in string.split("[")[1].split("]")[0].split(", ")]
            
    # Append thermodynamic data to the list
    thermo_data.append({
        'molecule_number': counter,
        'molecule_name': molecule_name,
        'dHf_298': dHf_298,
        'S_298': S_298,
        'Tmin_low': Tmin_low,
        'Tmax_low': Tmax_low,
        'a1l': nasa_low[0] if len(nasa_low) > 0 else None,
        'a2l': nasa_low[1] if len(nasa_low) > 1 else None,
        'a3l': nasa_low[2] if len(nasa_low) > 2 else None,
        'a4l': nasa_low[3] if len(nasa_low) > 3 else None,
        'a5l': nasa_low[4] if len(nasa_low) > 4 else None,
        'a6l': nasa_low[5] if len(nasa_low) > 5 else None,
        'a7l': nasa_low[6] if len(nasa_low) > 6 else None,
        'Tmin_high': Tmin_high,
        'Tmax_high': Tmax_high,
        'a1h': nasa_high[0] if len(nasa_high) > 0 else None,
        'a2h': nasa_high[1] if len(nasa_high) > 1 else None,
        'a3h': nasa_high[2] if len(nasa_high) > 2 else None,
        'a4h': nasa_high[3] if len(nasa_high) > 3 else None,
        'a5h': nasa_high[4] if len(nasa_high) > 4 else None,
        'a6h': nasa_high[5] if len(nasa_high) > 5 else None,
        'a7h': nasa_high[6] if len(nasa_high) > 6 else None
    })   


In [4]:
output_xyz = []
thermo_data = []
counter = 1

for root, dirs, files in os.walk(working_dir):
    for file in files:
        if file == "output.py":
            fpath = os.path.join(root, file)
            process_arkane_output(fpath)
            counter += 1
            
with open(output_file_xyz, 'w') as fout:
    fout.writelines(output_xyz)
    
with open(output_file_thermo, 'w') as csvout:
    fieldnames = [
        'molecule_number', 'molecule_name', 'dHf_298', 'S_298',
        'Tmin_low', 'Tmax_low', 'a1l', 'a2l', 'a3l', 'a4l', 'a5l', 'a6l', 'a7l',
        'Tmin_high', 'Tmax_high', 'a1h', 'a2h', 'a3h', 'a4h', 'a5h', 'a6h', 'a7h'
    ]
    
    writer = csv.DictWriter(csvout, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(thermo_data)

FileNotFoundError: [Errno 2] No such file or directory: '/home/ssun30/Work/Boron_Nitride/Arkane/output/xyz_coordinates.txt'