In [None]:
import ast
import json
import re
from pathlib import Path

In [None]:
src = "/home/ubuntu/ohm-tree-filesys-az/plasma-converter"
run_name = "valid_proper_comments_2024-10-31_18-57-11"

In [None]:
/home/ubuntu/ohm-tree-filesys-az/plasma-converter/results/valid_proper_comments_2024-10-31_18-57-11

In [None]:
solved_problems = []

for file_path in Path(src + "/results/" + run_name).iterdir():
    if file_path.is_file() and file_path.name.endswith(".txt"):
        with open(file_path, 'r') as file:
            lines = file.readlines()
            if lines[-1] == "Result: 1.0\n":
                # append the name of the file without extension
                solved_problems.append(file_path.stem)

In [None]:
def parse_tree_txt_to_dict(file_path):
    # Read the file contents
    with open(file_path, 'r') as file:
        content = file.read()

    # Split the content by double dashed lines
    sections = [sec.strip() for sec in content.split('--------------------------------------------------------------------------------\n' * 2) if sec.strip()]

    # print("section 0", sections[0])
    # Initialize a list to store parsed dictionaries
    parsed_data = []
    kv_pattern = re.compile(r"^\s*(\w.*?)\s*\|\s*(.*)$")

    # Define regex patterns to parse key-value pairs and LeanState blocks

    for section in sections:
        lines = section.splitlines()
        result = {}
        
        
        for line in lines:
            kv_match = kv_pattern.match(line)
            # Match lines with `key | value` format
            if line.startswith("LeanState"):
                lean_state_pattern = re.compile(
                    r"code=['\"](.*?)['\"],\s*"
                    r'depth=(\d+),\s*'
                    r"tactic_state=['\"](.*?)['\"],\s*"
                    r'dead=(True|False)\s*$',
                    re.DOTALL
                )
                lean_state_match = lean_state_pattern.search(line[len("LeanState("):-1])
                # print(line[len("LeanState("):-1])
                
                if lean_state_match:
                    code = lean_state_match.group(1)
                    depth = lean_state_match.group(2)
                    tactic_state = lean_state_match.group(3)
                    dead = lean_state_match.group(4)

                    code = code.replace('\\n', '\n')
                    tactic_state = tactic_state.replace('\\n', '\n')
                    
                    result['LeanState'] = {
                        'code': code,
                        'depth': int(depth),
                        'tactic_state': tactic_state,
                        'dead': dead == 'True'
                    }
            elif kv_match:
                key, value = kv_match.groups()
                key = key.strip()  # Remove extra spaces around the key
                value = value.strip()  # Remove extra spaces around the value
                
                # Convert value to appropriate type
                if value.startswith("[") and value.endswith("]"):  # Detect arrays
                    # Convert to a list of floats or ints
                    value = [float(v) if '.' in v else int(v) for v in re.findall(r"[-+]?\d*\.\d+|\d+", value)]
                elif value.startswith("(") and value.endswith(")"):  # Detect tuples
                    value = ast.literal_eval(value)
                elif value.isdigit():  # Convert to integer if it's a digit
                    value = int(value)
                else:
                    try:
                        value = float(value)  # Try converting to float
                    except ValueError:
                        pass  # Keep as string if not a float
                
                # Add to the dictionary
                result[key] = value
        
        # Append the parsed dictionary to the list
        parsed_data.append(result)

    return parsed_data

In [None]:
# TEST above
file_path = "/home/ubuntu/ohm-tree-filesys-az/plasma-converter/outputs/valid_proper_comments_2024-10-31_18-57-11/aime_1987_p8_tree.txt"
data = parse_tree_txt_to_dict(file_path)

# Display the parsed data (TEST)
for idx, section in enumerate(data):
    print(f"Section {idx + 1}:")
    for key, value in section.items():
        print(f"  {key}: {value}")

    print()