In [1]:
import pandas as pd
import os

# --- Helper function to count electrons ---
def get_electron_count_and_multiplicity(xyz_file_path, charge):
    atomic_numbers = {'H': 1, 'C': 6, 'N': 7, 'O': 8, 'F': 9, 'P': 15, 'S': 16, 'Cl': 17, 'Br': 35, 'B': 5}
    total_electrons = 0
    with open(xyz_file_path, 'r') as f:
        lines = f.readlines()[2:] # Skip header lines
        for line in lines:
            atom_symbol = line.split()[0]
            if atom_symbol in atomic_numbers:
                total_electrons += atomic_numbers[atom_symbol]
            else:
                raise ValueError(f"Unknown atom '{atom_symbol}' in {xyz_file_path}")

    # Adjust for the ion's charge
    final_electron_count = total_electrons - charge

    # Determine multiplicity
    if final_electron_count % 2 == 0:
        multiplicity = 1 # Even electrons -> Singlet
    else:
        multiplicity = 2 # Odd electrons -> Doublet

    return final_electron_count, multiplicity

# --- Main Script ---
ions_list_file = '../data/ions_with_smiles.csv'
structures_dir = '../structures/'
output_dir = '../calculations_individual/'
os.makedirs(output_dir, exist_ok=True)

orca_template = """! r2SCAN-3c Opt Freq MiniPrint
* xyzfile {charge} {multiplicity} {xyz_filename}
"""

try:
    df_ions = pd.read_csv(ions_list_file)
    print(f"Found {len(df_ions)} unique ions to process.")

    for index, row in df_ions.iterrows():
        ion_abbr = row['ion_abbreviation']
        ion_type = row['ion_type']
        charge = 1 if ion_type == 'cation' else -1

        safe_name = ion_abbr.strip('[]').replace('+', '_p').replace('-', '_m').replace('(', '_').replace(')', '_')
        xyz_filename_base = safe_name + '.xyz'
        xyz_filepath_relative = os.path.join(structures_dir, xyz_filename_base)

        if not os.path.exists(xyz_filepath_relative):
            continue

        # Get the correct multiplicity for this ion
        e_count, mult = get_electron_count_and_multiplicity(xyz_filepath_relative, charge)

        xyz_path_for_orca = f"../structures/{xyz_filename_base}"
        inp_content = orca_template.format(charge=charge, multiplicity=mult, xyz_filename=xyz_path_for_orca)

        output_path = os.path.join(output_dir, safe_name + '.inp')
        with open(output_path, 'w') as f_out:
            f_out.write(inp_content)

        print(f"Generated: {safe_name}.inp (Charge: {charge}, Electrons: {e_count}, Multiplicity: {mult})")

    print(f"\nProcess complete.")

except Exception as e:
    print(f"An error occurred: {e}")

Found 84 unique ions to process.
Generated: _ETO_2IM.inp (Charge: 1, Electrons: 84, Multiplicity: 1)
Generated: BBIM.inp (Charge: 1, Electrons: 100, Multiplicity: 1)
Generated: BMIM.inp (Charge: 1, Electrons: 76, Multiplicity: 1)
Generated: BMMIM.inp (Charge: 1, Electrons: 84, Multiplicity: 1)
Generated: BMPYR.inp (Charge: 1, Electrons: 80, Multiplicity: 1)
Generated: C12MIM.inp (Charge: 1, Electrons: 140, Multiplicity: 1)
Generated: C3MPYR.inp (Charge: 1, Electrons: 72, Multiplicity: 1)
Generated: C3OMIM.inp (Charge: 1, Electrons: 76, Multiplicity: 1)
Generated: C4MPY.inp (Charge: 1, Electrons: 82, Multiplicity: 1)
Generated: C4PY.inp (Charge: 1, Electrons: 74, Multiplicity: 1)
Generated: C5MIM.inp (Charge: 1, Electrons: 84, Multiplicity: 1)
Generated: C5MPYR.inp (Charge: 1, Electrons: 88, Multiplicity: 1)
Generated: C5O2MIM.inp (Charge: 1, Electrons: 100, Multiplicity: 1)
Generated: C6F9MIM.inp (Charge: 1, Electrons: 164, Multiplicity: 1)
Generated: C7MIM.inp (Charge: 1, Electrons: 1