In [1]:
from datetime import date
from typing import NamedTuple, List

class Action(NamedTuple):
    date: date
    action: str

class Run(NamedTuple):
    date: date
    run: int
    precursor: str
    co_reactant: str
    cycles: int
    process: str
    sequence: str
    T: int
    P: int

ACTIONS: List[Action] = []
RUNS: List[Run] = []

In [None]:
from datetime import datetime
import io
from parse import parse, Result
import string
from typing import cast

def parse_experiment_contents(buffer: io.TextIOBase, global_date: date):
    global_T: int
    global_P: int

    buffer.seek(0)
    
    # Register Globals
    _result = find_read_line(buffer, "T{:s}={:s}{T:g}{T_unit:l}")
    if not "T" in _result:
        raise ValueError("Could not find global temperature!")
    global_T = _result["T"]
    
    _result = find_read_line_aliases(buffer, [
        "P{:s}={:s}{P:g}{:s}{P_unit:l}",
        "Pressure{:s}={:s}{P:g}{:s}{P_unit:l}"
    ])
    if not "P" in _result:
        raise ValueError("Could not find global pressure!")
    global_P = _result["P"]

    # Action
    _checkpoint_pre_action = buffer.tell()
    if not find_read_separator(buffer, "_", 28):
        raise ValueError("Could not find action section")

    if occurs_before_separator(buffer, "loaded", "_", 28):
        action_label = read_nonws_line(buffer)
        
        _result = find_read_line(buffer, "{action_type} {month:d}/{day:d}/{year:d}")
        action_type = _result["action_type"]
        action_date: date = datetime(
            year=_result["year"],
            month=_result["month"],
            day=_result["day"]
        )

        # Action: Wafer
        if find_read_separator(buffer, "_", 28) == -1:
            raise ValueError("Could not find action->wafer section")
        
        wafer_label = read_nonws_line(buffer)
        # discard wafer label for now
        
        # Register Action
        this_action = Action(
            date=action_date,
            action=f"{action_label} {action_type}"
        )
        ACTIONS.append(this_action)
    else:
        # Not an action, likely a run instead
        buffer.seek(_checkpoint_pre_action)

    # Runs
    while True:
        if find_read_separator(buffer, "_", 28) == -1:
            break 

        _result = find_read_line(buffer, "Run{:s}{number:d}")
        if not "number" in _result:
            break
        
        run_number = _result["number"]

        _result = find_read_line(buffer, "{cycles:d}{:s}cycles{:s}{precursor}|{co_reactant}")
        run_cycles = _result["cycles"]
        run_precursor = _result["precursor"]
        run_co_reactant = _result["co_reactant"]

        run_process = read_nonws_line(buffer)
        if not run_process:
            raise ValueError(f"Missing run process")
        _run_proc_partitions = run_process.count("|") 

        run_sequence = read_nonws_line(buffer)
        if not run_sequence:
            raise ValueError(f"Missing run sequence")
        _run_seq_partitions = run_sequence.count("|")
        
        if (_run_proc_partitions != _run_seq_partitions):
            raise ValueError(f"Mismatch in partitions between process and run sequence in run #{run_number}")
        
        # Register Run
        this_run = Run(
            date=global_date,
            run=run_number,
            precursor=run_precursor,
            co_reactant=run_co_reactant,
            cycles=run_cycles,
            process=run_process,
            sequence=run_sequence,
            T=global_T,
            P=global_P
        )
        RUNS.append(this_run)
       
def occurs_before_separator(buffer: io.TextIOBase, key: str, tile: str, threshold: int) -> int:
    checkpoint = buffer.tell()

    while True:
        line = read_nonws_line(buffer)
        if not line:
            return -1
        
        if not key in line:
            continue
            
        result = buffer.tell()
        buffer.seek(checkpoint)
        return result

def find_read_separator(buffer: io.TextIOBase, tile: str, threshold: int) -> int:
    threshold_str = tile * (threshold-1)

    result = find_read_line(buffer, threshold_str + "{remainder:w}")
    if "__file_position__" not in result:
        return -1
    
    return result["__file_position__"]

def find_read_line(buffer: io.TextIOBase, pattern: str) -> dict:
    return find_read_line_aliases(buffer, [pattern])

def find_read_line_aliases(buffer: io.TextIOBase, patterns: List[str]) -> dict:
    checkpoint = buffer.tell()

    while True:
        line = read_nonws_line(buffer)
        if not line:
            print(f"Could not find line: {patterns}")
            buffer.seek(checkpoint)
            return {}

        result_dict = parse_aliases_to_dict(patterns, line.strip())
        if len(result_dict) == 0:
            continue
        
        result_dict["__file_position__"] = buffer.tell()
        return result_dict

def parse_aliases_to_dict(patterns, line) -> dict:
    for pattern in patterns:
        result = parse_to_dict(pattern, line)
        if len(result) != 0:
            return result
    return {}


def parse_to_dict(pattern, line) -> dict:
    result = parse(pattern, line)
    if not result:
        return {}
    
    result = cast(Result, result) 

    formatter = string.Formatter()
    field_names = [fname for _, fname, _, _ in formatter.parse(pattern) if fname]

    # Check that all named fields are non-empty
    if not all(result.named.get(name) not in [None, ""] for name in field_names):
        print(f"A parameter was missing that fit {pattern}")
        return {}

    result_dict = result.named
    result_dict["__match_pattern__"] = pattern
    return result_dict


def read_nonws_line(buffer: io.TextIOBase) -> str | None: 
    checkpoint = buffer.tell()
    
    while True:
        line = buffer.readline()
        if not line:
            buffer.seek(checkpoint)
            return None

        if not line.strip():
            continue

        return line


In [3]:
from datetime import datetime
import os

def parse_experiment_file(path):
    f = open(path, 'r')
    exp_date: date = datetime.strptime(os.path.basename(path), "%Y%m%d.txt")
    parse_experiment_contents(f, exp_date)

In [4]:
import bisect
import pandas as pd

def update_experiment_df(df: pd.DataFrame):
    for action in ACTIONS:
        action_row = pd.DataFrame([{
            'Date': int(action.date.strftime("%Y%m%d")),
            'Run/Action': action.action,
            'Precursor': None,
            'Co-reactant': None,
            'Co-absorbate': None,
            '# Cycles': None,
            'Process': None,
            'Sequence': None,
            'Furnace T (°C)': None,
            'P (Torr)': None,
        }])

        df = pd.concat([df, action_row], ignore_index=True)

    for run in RUNS:
        run_row = pd.DataFrame([{
            'Date': int(run.date.strftime("%Y%m%d")),
            'Run/Action': run.run,
            'Precursor': run.precursor,
            'Co-reactant': run.co_reactant,
            'Co-absorbate': None,
            '# Cycles': None,
            'Process': f"{run.cycles} cycles {run.process}",
            'Sequence': run.sequence,
            'Furnace T (°C)': run.T,
            'P (Torr)': run.P,
        }])

        df = pd.concat([df, run_row], ignore_index=True)

    df = df.sort_values(by=['Date', 'Run/Action'], ascending=[True, True])
    return df

In [5]:
parse_experiment_file('data/20240123.txt')
parse_experiment_file('data/20240124.txt')
parse_experiment_file('data/20240125.txt')

df = pd.DataFrame()
df = update_experiment_df(df)
df.to_excel('output.xlsx', index=False)

Could not find line: ['___________________________{remainder:w}']
Could not find line: ['Run{:s}{number:d}']
Could not find line: ['___________________________{remainder:w}']


  df = pd.concat([df, run_row], ignore_index=True)
