# file-io

> get things from file-based terachem outputs

In [None]:
#| default_exp file-io

In [None]:
#| hide
from nbdev.showdoc import *
from fastcore.test import *

In [None]:
#| export
from pathlib import Path
import re

In [None]:
#| export
def get_casscf_energy_of_state(
    file: Path,  # Path to TC output file
    state: int,  # State ID (1-indexed)
) -> float: # Energy in Hartree
    """Returns CASSCF energy of state"""
    with open(file, "r") as f:
        for line in f:
            if f"Singlet state  {state} energy:" in line:
                return float(line.split()[-1])

## Grab energy of casscf jobs

In [None]:
s1_energy = get_casscf_energy_of_state(Path('../examples/tc-outfile/tc.out'), state=2)
print(f"s1 energy: {s1_energy}")
test_close(get_casscf_energy_of_state(Path('../examples/tc-outfile/tc.out'), state=1), -750.98527871713941, eps=1e-6)

s1 energy: -750.8831979447649


In [None]:
# | export
def get_cas_tdip_of_states(
    file: Path,  # Path to TC output file
) -> dict:
    """Returns transition dipole moment information of requested states"""
    tdip_info = []
    # pattern = r"(\d+) ->  (\d+)([ \t-]+)([\d.-]+)([ \t-]+)([\d.-]+)([ \t-]+)([\d.-]+)([ \t-]+)([\d.-]+)([ \t-]+)([\d.-]+)"
    pattern = r"(\d+) ->  (\d+)([ \s*]+)([-?\d.]+)([ \s*]+)([-?\d.]+)([ \s*]+)([-?\d.]+)([ \s*]+)([-?\d.]+)([ \s*]+)([-?\d.]+)"
    tdip_string = "Singlet state electronic transitions:"
    end_string = "Singlet state velocity transition dipole moments:"
    tdip_section = False
    with open(file, "r") as f:
        for line in f:
            if tdip_string in line:
                tdip_section = True

            if end_string in line:
                tdip_section = False

            if tdip_section:
                match = re.search(pattern, line)
                # delete groups with only whitespace
                if match:
                    # match = [m for m in match.groups() if m.strip()]
                    state_1, state_2, _, t_x, _, t_y, _, t_z, _, t_mag, _, osc = match.groups()
                    tdip_info.append({"state1": int(state_1), "state2": int(state_2), "t_x": float(t_x), "t_y": float(t_y), "t_z": float(t_z), "t_mag": float(t_mag), "osc": float(osc)})
    return tdip_info

## Can also get Transition Dipole Moment Info from TeraChem output file

In [None]:
tdip_test = [{'state1': 1,
  'state2': 2,
  't_x': 3.8931,
  't_y': -0.7637,
  't_z': 0.0162,
  't_mag': 3.9673,
  'osc': 1.0711},
 {'state1': 1,
  'state2': 3,
  't_x': -0.1401,
  't_y': 0.1908,
  't_z': 0.5539,
  't_mag': 0.6023,
  'osc': 0.0366},
 {'state1': 2,
  'state2': 3,
  't_x': -0.7214,
  't_y': 0.1486,
  't_z': -0.0227,
  't_mag': 0.7369,
  'osc': 0.0178}]

In [None]:
test_eq(get_cas_tdip_of_states(Path('../examples/tc-outfile/tc.out')), tdip_test) 

In [None]:
#| hide
import nbdev
nbdev.nbdev_export()