In [7]:
import os
import re
import csv
import glob
import numpy as np

In [13]:
with open('test/batch_0000/molecule_1/triplet_tddft.output', 'r') as f:
    content = f.read()

In [14]:
def extract_excited_states(output_content):
    """Extract S1 and T1 excited state energies."""
    # Extract S1 energy (STATE 1 in singlets section)
    s1_pattern = r'STATE\s+1:\s+E=\s+[\d\.-]+\s+au\s+([\d\.]+)\s+eV.*?<S\*\*2>\s+=\s+[\d\.]+\s+Mult\s+1'
    s1_match = re.search(s1_pattern, output_content)
    s1_energy = float(s1_match.group(1)) if s1_match else None
    
    # Extract T1 energy (first triplet state with Mult 3)
    t1_pattern = r'STATE\s+\d+:\s+E=\s+[\d\.-]+\s+au\s+([\d\.]+)\s+eV.*?<S\*\*2>\s+=\s+[\d\.]+\s+Mult\s+3'
    t1_match = re.search(t1_pattern, output_content)
    t1_energy = float(t1_match.group(1)) if t1_match else None
    
    return s1_energy, t1_energy

In [15]:
def extract_soc_matrix_elements(output_content):
    """Extract SOC matrix elements for all T-S couplings."""
    # Look for SOC matrix elements section
    soc_pattern = r'CALCULATED SOCME BETWEEN TRIPLETS AND SINGLETS(.*?)SOC stabilization'
    soc_match = re.search(soc_pattern, output_content, re.DOTALL)
    
    soc_elements = {}
    if soc_match:
        lines = soc_match.group(1).split('\n')
        
        for line in lines:
            # Pattern to match SOC matrix element lines
            # Format: T S (Re_Z, Im_Z) (Re_X, Im_X) (Re_Y, Im_Y)
            pattern = r'^\s*(\d+)\s+(\d+)\s+\(\s*([-\d\.]+)\s*,\s*([-\d\.]+)\s*\)\s+\(\s*([-\d\.]+)\s*,\s*([-\d\.]+)\s*\)\s+\(\s*([-\d\.]+)\s*,\s*([-\d\.]+)\s*\)'
            match = re.match(pattern, line.strip())
            
            if match:
                t_state = int(match.group(1))
                s_state = int(match.group(2))
                
                # Extract real and imaginary parts for Z, X, Y components
                z_re, z_im = float(match.group(3)), float(match.group(4))
                x_re, x_im = float(match.group(5)), float(match.group(6))
                y_re, y_im = float(match.group(7)), float(match.group(8))
                
                # Calculate magnitude of the SOC matrix element
                # Total magnitude = sqrt(|Z|^2 + |X|^2 + |Y|^2)
                z_mag = np.sqrt(z_re**2 + z_im**2)
                x_mag = np.sqrt(x_re**2 + x_im**2)
                y_mag = np.sqrt(y_re**2 + y_im**2)
                total_mag = np.sqrt(z_mag**2 + x_mag**2 + y_mag**2)
                
                # Store with key format 'S*-T*'
                key = f'S{s_state}-T{t_state}'
                soc_elements[key] = total_mag
    
    return soc_elements

In [16]:
def extract_absorption_spectrum(output_content):
    """Extract absorption spectrum data (regular and SOC-corrected)."""
    # Regular absorption spectrum
    abs_pattern = r'ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS\s+(.*?)(?=\n\s*\n|\nSOC|$)'
    abs_match = re.search(abs_pattern, output_content, re.DOTALL)
    
    e_abs, f_abs = [], []
    if abs_match:
        lines = abs_match.group(1).split('\n')
        for line in lines:
            # Parse transition lines: look for energy and oscillator strength
            pattern = r'^\s*\d+-\d+[A-Z]\s+->\s+\d+-\d+[A-Z]\s+([\d\.]+)\s+[\d\.]+\s+[\d\.]+\s+([\d\.]+)'
            match = re.match(pattern, line.strip())
            if match:
                energy_ev = float(match.group(1))
                osc_strength = float(match.group(2))
                e_abs.append(energy_ev)
                f_abs.append(osc_strength)
    
    # SOC-corrected absorption spectrum
    soc_abs_pattern = r'SOC CORRECTED ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS\s+(.*?)(?=\n\s*\n|SOC CORRECTED ABSORPTION SPECTRUM VIA TRANSITION VELOCITY|$)'
    soc_abs_match = re.search(soc_abs_pattern, output_content, re.DOTALL)
    
    e_abs_soc, f_abs_soc = [], []
    if soc_abs_match:
        lines = soc_abs_match.group(1).split('\n')
        for line in lines:
            # Parse SOC transition lines
            pattern = r'^\s*\d+-[\d\.]+[A-Z]\s+->\s+\d+-[\d\.]+[A-Z]\s+([\d\.]+)\s+[\d\.]+\s+[\d\.]+\s+([\d\.]+)'
            match = re.match(pattern, line.strip())
            if match:
                energy_ev = float(match.group(1))
                osc_strength = float(match.group(2))
                e_abs_soc.append(energy_ev)
                f_abs_soc.append(osc_strength)
    
    return e_abs, f_abs, e_abs_soc, f_abs_soc

In [17]:
s1, t1 = extract_excited_states(content)
print(s1, t1)

3.718 2.369


In [16]:
soc = extract_soc_matrix_elements(content)
soc

{'S0-T1': 15.864504404487395,
 'S1-T1': 6.451604451607367,
 'S2-T1': 1.0609901036296239,
 'S3-T1': 8.898252637456412,
 'S4-T1': 2.92624332549431,
 'S5-T1': 1.2758526560696575,
 'S0-T2': 19.955959009779512,
 'S1-T2': 9.917731595480893,
 'S2-T2': 1.1497825881443848,
 'S3-T2': 4.270339564952651,
 'S4-T2': 4.281471709587721,
 'S5-T2': 0.5051732376126036,
 'S0-T3': 1.2068139873236472,
 'S1-T3': 0.8830062287436029,
 'S2-T3': 2.572022550445466,
 'S3-T3': 0.452658811910251,
 'S4-T3': 8.188583516091168,
 'S5-T3': 0.8012490249604053,
 'S0-T4': 5.910406077419724,
 'S1-T4': 3.7927035212365334,
 'S2-T4': 0.1606237840420901,
 'S3-T4': 3.104239037187697,
 'S4-T4': 0.8218880702382777,
 'S5-T4': 0.1711724276862369,
 'S0-T5': 1.4086163423728975,
 'S1-T5': 1.4996332885075605,
 'S2-T5': 2.4282915805149923,
 'S3-T5': 0.08774964387392122,
 'S4-T5': 3.0918926242675377,
 'S5-T5': 0.30331501776206204}

In [30]:
e_abs, f_abs, e_abs_soc, f_abs_soc = extract_absorption_spectrum(content)

In [35]:
f_abs_soc

[1.9e-08,
 1.3e-08,
 5.8838e-05,
 0.003706511,
 1e-09,
 0.0,
 6.1328e-05,
 9.19e-07,
 1.62e-07,
 3.6716e-05,
 0.000899632,
 2.1046e-05,
 2e-08,
 2.95e-07,
 0.000997464,
 9.6e-08,
 2e-09,
 7.3578e-05,
 0.004203657,
 0.08066273]