In [4]:
from datetime import date
from typing import NamedTuple, List

class Action(NamedTuple):
    date: date
    label: str

class Run(NamedTuple):
    date: date
    number: int
    precursor: str
    co_reactant: str
    process: str
    sequence: str
    T: int
    P: int

ACTIONS: List[Action] = []
RUNS: List[Run] = []

In [None]:
from datetime import datetime
import io
from parse import parse, Result
import string
from typing import cast

def parse_experiment_file(buffer: io.TextIOBase, filename: str):
    global_date: date = datetime.strptime(filename, "%Y%m%d.txt")
    
    global_T: int
    global_P: int

    buffer.seek(0)
    
    # Register Globals
    _result = find_read_line(buffer, "T{:s}={:s}{T:g}{T_unit:l}")
    global_T = _result["T"]
    
    _result = find_read_line(buffer, "P{:s}={:s}{P:g}{:s}{P_unit:l}")
    global_P = _result["P"]

    # Action: Crystal
    if not find_read_separator(buffer, "_", 28):
        raise ValueError("Could not find crystal section")
    action_label = read_nonws_line(buffer)
    
    _result = find_read_line(buffer, "{action_type} {month:d}/{day:d}/{year:d}")
    action_type = _result["action_type"]
    action_date: date = datetime(
        year=_result["year"],
        month=_result["month"],
        day=_result["day"]
    )

    # Action: Wafer
    if not find_read_separator(buffer, "_", 28):
        raise ValueError("Could not find wafer section")
    wafer_label = read_nonws_line(buffer)
    # discard wafer label for now
    
    # Register Action
    this_action = Action(
        date=action_date,
        label=f"{action_label} {action_type}"
    )
    ACTIONS.append(this_action)

    # Runs
    while True:
        print("Run time")
        _separator = find_read_separator(buffer, "_", 28)
        if not _separator:
            break 

        _result = find_read_line(buffer, "Run{:s}{number:d}")
        run_number = _result["number"]

        _result = find_read_line(buffer, "{cycles_str}{:s}{precursor}|{co_reactant}")
        run_cycles_str = _result["cycles_str"]
        run_precursor = _result["precursor"]
        run_co_reactant = _result["co_reactant"]

        run_process = read_nonws_line(buffer)
        if not run_process:
            raise ValueError(f"Missing run process")
        _run_proc_partitions = run_process.count("|") 

        run_sequence = read_nonws_line(buffer)
        if not run_sequence:
            raise ValueError(f"Missing run sequence")
        _run_seq_partitions = run_sequence.count("|")
        
        if (_run_proc_partitions != _run_seq_partitions):
            raise ValueError(f"Mismatch in partitions between process and run sequence in run #{run_number}")
        
        # Register Run
        this_run = Run(
            date=global_date,
            number=run_number,
            precursor=run_precursor,
            co_reactant=run_co_reactant,
            process=f"{run_cycles_str} cycles {run_process}",
            sequence=run_sequence,
            T=global_T,
            P=global_P
        )
        RUNS.append(this_run)

def read_nonws_line(buffer: io.TextIOBase) -> str | None: 
    while True:
        line = buffer.readline()
        if not line:
            return None

        if not line.strip():
            continue

        return line

def find_read_line(buffer: io.TextIOBase, pattern: str) -> dict:
    formatter = string.Formatter()
    field_names = [fname for _, fname, _, _ in formatter.parse(pattern) if fname]

    while True:
        line = read_nonws_line(buffer)
        if not line:
            raise ValueError(f"Could not find line: {pattern}")

        result = parse(pattern, line.strip())
        if not result:
            continue
        else:
            result = cast(Result, result) 

        # Check that all named fields are non-empty
        if all(result.named.get(name) not in [None, ""] for name in field_names):
            return result.named
        
def find_read_separator(buffer: io.TextIOBase, tile: str, threshold: int):
    while True:
        line = read_nonws_line(buffer)
        if not line:
            return None
        
        if not line.startswith(tile * threshold):
            continue
        
        return line


In [6]:
f = open('data/20240123.txt', 'r')
parse_experiment_file(f, "20240123.txt")
print(RUNS)
print(ACTIONS)

Run time
Run time
Run time
[Run(date=datetime.datetime(2024, 1, 23, 0, 0), number=1, precursor='cycles TMA', co_reactant='H2O', process='10 Cycles TMA|Purge|H2O|purge\n', sequence='2|28|0.5|59.5\n', T=285.0, P=1.0), Run(date=datetime.datetime(2024, 1, 23, 0, 0), number=2, precursor='cycles TMA', co_reactant='tbuoh', process='10 Cycles TMA|Purge|tbuoh|purge\n', sequence='2|28|0.5|59.5\n', T=285.0, P=1.0)]
[Action(date=datetime.datetime(2024, 1, 22, 0, 0), label='NEW Au 285C crystal \n loaded')]
