In [13]:
import tomllib
import msgspec
from pathlib import Path
from IPython.display import display
from decimal import Decimal

In [14]:
content_file = Path(r"../tests/assets/test.toml").open('rb').read()
content_file_str = Path(r"../tests/assets/test.toml").open('rb').read()

## timing tests between msgspec and tomllib
Result is : no difference, so let's use tomllib which is embedded in python

In [None]:
%%timeit
test_content = msgspec.toml.decode(content_file)

111 µs ± 325 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [5]:
%%timeit
test_content = tomllib.loads(content_file.decode())

113 µs ± 808 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


## get test toml data from file

In [15]:
test_content = tomllib.load(Path(r"../tests/assets/test.toml").open('rb'))  #, parse_float=Decimal)
test_content

{'title': 'TOML Example',
 'owner': {'info': {'name': 'Tom Preston-Werner',
   'dob': datetime.datetime(1979, 5, 27, 7, 32, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=57600))),
   'nb_of_days_per_month': 30,
   'nb_of_months': 3,
   'nb_of_days': '=nb_of_months * nb_of_days_per_month',
   'calculated_value': '= nb_of_days^3 * database.nb_of_ports + 2 / database.data[1][0]^2 * 2.32e-3 * _input_parts - cos(2*nb_of_days)'},
  'database': {'enabled': True,
   'nb_of_ports': 3,
   'ports': [8000, 8001, 8002],
   'data': [['delta', 'phi'], [3.14]],
   'temp_targets': {'cpu': 79.5, 'case': 72.0}}},
 'servers': {'alpha': {'ip': '10.0.0.1',
   'role': 'frontend',
   'test_list_of_dict': [{'a': 'a_val', 'b': 'b_val'}, {'c': 'c_val'}]},
  'beta': {'role': 'backend',
   'test': {'ip': '10.0.0.2', 'role': 'backend_test'},
   'prod': {'ip': '10.0.0.3', 'role': 'backend_prod'}}}}

## find calculated variables in toml dict

In [16]:
from typing import Any


def find_formulas(dictionary: dict[str, Any]) -> dict[tuple[str, ...], str]:
    formulas = {}
    for key, value in dictionary.items():
        if isinstance(value, str) and value.startswith("="):
            formulas[(key,)] = value
        elif isinstance(value, dict):
            nested_dict = find_formulas(value)
            for nested_key, nested_value in nested_dict.items():
                formulas[(key, *nested_key)] = nested_value
        # elif type(value) in (list, tuple):
        #     for list_key, list_value in enumerate(value):

        #         nested_dict = find_formulas(list_value)
        #         for nested_key, nested_value in nested_dict.items():
        #             formulas[(list_key, *nested_key)] = nested_value
    return formulas


In [17]:
find_formulas(test_content)

{('owner', 'info', 'nb_of_days'): '=nb_of_months * nb_of_days_per_month',
 ('owner',
  'info',
  'calculated_value'): '= nb_of_days^3 * database.nb_of_ports + 2 / database.data[1][0]^2 * 2.32e-3 * _input_parts - cos(2*nb_of_days)'}

## parse the calculated variable formula into a nested list

In [18]:
import pyparsing as pp

In [19]:
from typing import Callable


def decimal_parser() -> pp.ParserElement:
    def convert_to_decimal(token: pp.ParseResults) -> Decimal:
        return Decimal(str(token[0]))
    int_part = pp.Opt(pp.one_of("- +"))+pp.Word(pp.nums)
    mantissa = int_part + pp.Opt("." + pp.Opt(pp.Word(pp.nums)))
    exponent_part = pp.Opt(pp.one_of("e E") + pp.Opt(pp.one_of("- +"))+ pp.Word(pp.nums))
    number = pp.Combine(mantissa + exponent_part)
    number.set_parse_action(convert_to_decimal)
    return number

def regular_number_parser() -> pp.ParserElement:
    return pp.common.number

def var_name_parser() -> pp.ParserElement:
    dict_like_key = pp.Suppress(".") + pp.common.identifier
    list_like_index = pp.Suppress("[")+pp.common.integer+pp.Suppress("]")
    subvar_element = dict_like_key ^ list_like_index
    return pp.Group(pp.common.identifier + subvar_element[...], aslist=True)

def operator_operand_expr(number_parser: Callable[[], pp.ParserElement]) -> pp.ParserElement:
    operator_operand = pp.Forward()
    number = number_parser()
    var_name = var_name_parser()
    function_struct = pp.common.identifier + pp.Suppress("(") + operator_operand + pp.Suppress(")")

    operand = function_struct | number | var_name
    operator_operand <<= pp.infix_notation(operand,
    [
        ("-", 1, pp.OpAssoc.RIGHT),
        ("^", 2, pp.OpAssoc.LEFT),
        (pp.one_of("* /"), 2, pp.OpAssoc.LEFT),
        (pp.one_of("+ -"), 2, pp.OpAssoc.LEFT),
    ])
    return operator_operand

def build_operand_parser(number_parser: Callable[[], pp.ParserElement]) -> pp.ParserElement:
    operator_operand = operator_operand_expr(number_parser)
    return pp.Suppress("=") + operator_operand

operand_parser = build_operand_parser(number_parser=regular_number_parser)

In [24]:
isinstance(operand_parser, pp.ParserElement)

True

In [8]:
operand_parser.parse_string("=-2.233e3--2").asList()

[[['-', 2233.0], '-', ['-', 2]]]

In [9]:
test_str = "=nb_of_days^(cos(pi/2)+2) * database.nb_of_ports + 2 / database.data[1][0]^2 * 2.32e-3 * _input_parts - cos(2* pi + 3*sin(2-8*3.2e-5^6)/nb_of_days)"
test_parsed = operand_parser.parse_string(test_str).asList()
test_parsed

[[[[['nb_of_days'], '^', ['cos', [['pi'], '/', 2], '+', 2]],
   '*',
   ['database', 'nb_of_ports']],
  '+',
  [2,
   '/',
   [['database', 'data', 1, 0], '^', 2],
   '*',
   0.00232,
   '*',
   ['_input_parts']],
  '-',
  'cos',
  [[2, '*', ['pi']],
   '+',
   [3,
    '*',
    'sin',
    [2, '-', [8, '*', [3.2e-05, '^', 6]]],
    '/',
    ['nb_of_days']]]]]

In [10]:
import math
math_functions = []
math_constants = []
for math_object_name in dir(math):
    math_object = getattr(math, math_object_name)
    if callable(math_object) and not math_object_name.startswith("__"):
        math_functions.append(math_object_name)
    elif type(math_object) == float:
        math_constants.append(math_object_name)

## get variable references in nested list

In [11]:
def find_var_ref_indices(parsed_lists: list) -> list:
    base_elements_indices = []
    for i, element in enumerate(parsed_lists):
        # we have a var name
        if isinstance(element, list) and isinstance(element[0], str) and element[0] not in math_functions:
            # base_element = element[0]
            # print(element)
            base_elements_indices.append([i])
        elif isinstance(element, list):
            base_elements_indices.extend([[i, *inner_indices] for inner_indices in find_var_ref_indices(element)])
    return base_elements_indices

find_var_ref_indices([[[[['nb_of_days'], '^', 3], '*', ['database', 'nb_of_ports']],
  '+',
  [2,
   '/',
   [['database', 'data', 1, 0], '^', 2],
   '*',
   0.00232,
   '*',
   ['_input_parts']],
  '-',
  'cos',
  [2, '*', ['nb_of_days']]]])

[[0, 0, 0, 0], [0, 0, 2], [0, 2, 2, 0], [0, 2, 6], [0, 5, 2]]

## get/set elements in a nested list

In [12]:
# see https://stackoverflow.com/a/44579249/10926757
from typing import Any


def get_deep(nested_list: list, indices: list[int]) -> Any:
    if (len(indices) > 1) and isinstance(nested_list[indices[0]], list):
        return get_deep(nested_list[indices[0]], indices[1:])
    else:
        return nested_list[indices[0]]

def set_deep(nested_list: list, indices: list[int], value: Any) -> list:
    if (len(indices) > 1) and isinstance(nested_list[indices[0]], list):
        set_deep(nested_list[indices[0]], indices[1:], value)
    else:
        nested_list[indices[0]] = value
    return nested_list

get_deep(test_parsed, [0, 2, 2, 0])
set_deep(test_parsed, [0, 2, 2, 0], test_content["database"]["data"][1][0])

KeyError: 'database'

## get the value of a variable from the reference list
there are some scope considerations here: the first element in the reference can be any element of the tree leading to  the calculated variable

In [26]:
def build_ref_levels(ref_position: list[str|int]) -> list[list[str|int]]:
    ref_levels = []
    for i in reversed(range(len(ref_position))):
        ref_levels.append(ref_position[0:i])
    return ref_levels

def get_value_in_data(data: dict, list_identifier: list[str | int]) -> Any:
    item = data
    for i_or_key in list_identifier:
        item = item[i_or_key]
    return item

def get_value_from_ref_list(data: dict, list_identifier: list[str | int], ref_levels: list[list[str | int]]) -> Any:
    return_value = None
    for ref_level in ref_levels:
        try:
            return_value = get_value_in_data(data=data, list_identifier=ref_level + list_identifier)
        except (KeyError, TypeError):
            continue
    return return_value


list_identifier = ["database", "data", 1, 0]
ref_levels = build_ref_levels(["owner", "info", "calculated_value"])
get_value_from_ref_list(test_content, list_identifier=list_identifier, ref_levels=ref_levels)

3.14

## calculate the formula with only values  

In [14]:
def find_inner_lists(parsed_lists: list) -> list[list[int]]:
    inner_lists_positions = []
    def inner_list_seeker(search_list: list, position: list[int]) -> list[int] | None:
        found_list = False
        for i, element in enumerate(search_list):
            if isinstance(element, list):
                inner_lists_positions.append(inner_list_seeker(element, position + [i]))
                found_list = True
        if not found_list:
            return position
    inner_list_seeker(parsed_lists, position=[])
    return [position for position in inner_lists_positions if position is not None]

Number = int | float | Decimal

OPERATIONS = {
    r"+": lambda a, b: a + b,
    r"-": lambda a, b: a - b,
    r"*": lambda a, b: a * b,
    r"/": lambda a, b: a / b,
    r"^": lambda a, b: a ** b,
}

def calc_result_update(result: Number, value: Number, operation: str | None = None):
    if result is None or operation is None:
        return value
    else:
        return OPERATIONS[operation](result, value)

def calc_math_func_result(function_name: "str", function_arg: Number):
    return getattr(math, function_name)(function_arg)

def calculate_local(calc_list: list[Number | str]) -> Number:
    result = 0
    operator = "+"
    i = 0
    while i < len(calc_list):
        operand_or_operator = calc_list[i]
        if operand_or_operator in list(OPERATIONS.keys()):
            operator = operand_or_operator
        else:
            if operand_or_operator in math_functions:
                operand = calc_math_func_result(function_name=operand_or_operator, function_arg=calc_list[i+1])
                i += 1
            else:
                operand = operand_or_operator
            result = calc_result_update(result, operand, operator)
        i += 1

    return result


def calculate_formula_w_value(parsed_formula_w_value: list) -> Number:
    inner_lists = find_inner_lists(parsed_formula_w_value)

    while len(inner_lists) > 0:
        for inner_list in inner_lists:
            calc_list = get_deep(parsed_formula_w_value, inner_list)
            local_result = calculate_local(calc_list=calc_list)
            set_deep(parsed_formula_w_value, inner_list, local_result)
        inner_lists = find_inner_lists(parsed_formula_w_value)
        
    return parsed_formula_w_value[0]

# calculate_local(["cos", 3.14, "+", 1, "-", 2, "+", "sin", 3.14/2])


## assemble all bricks to calculate a calculated variable

In [15]:
import functools


def _parse_any_value(value: str | Any, operand_parser: pp.ParserElement) -> list | Any:
    if isinstance(value, str) and value.startswith("="):
        parsed_new_val = operand_parser.parse_string(value).asList()
    else:
        parsed_new_val = value
    return parsed_new_val

parse_any_value = functools.partial(_parse_any_value, operand_parser=operand_parser)

def replace_vars_by_values(parsed_formula: list, data: dict, formula_position: list, context_variables: dict[str, float | int | Decimal]) -> list:
    while True:
        var_ref_indices = find_var_ref_indices(parsed_formula)
        if var_ref_indices == []:
            break
        for var_ref_index in var_ref_indices:
            var_ref = get_deep(parsed_formula, var_ref_index)
            try:
                parsed_new_val = context_variables[var_ref[0]]
            except (KeyError, TypeError):
                ref_levels = build_ref_levels(formula_position)
                var_new_val = get_value_from_ref_list(data, list_identifier=var_ref, ref_levels=ref_levels)
                parsed_new_val = parse_any_value(var_new_val)
            set_deep(parsed_formula, var_ref_index, parsed_new_val)

    return parsed_formula


formulas = find_formulas(test_content)
for formula_position, formula in list(formulas.items()):
    parsed_formula = operand_parser.parse_string(formula).asList()
    parsed_formula_w_value = replace_vars_by_values(parsed_formula=parsed_formula, data=test_content, formula_position=list(formula_position), context_variables={"_input_parts": 25})
    calculated_value = calculate_formula_w_value(parsed_formula_w_value)
    # set the value in toml dict
    display(calculated_value)

90

2187000.610225252

## algorithm considerations
last level of list can be:
- a calculation with only numbers
- a variable name or succession of names if tree (dict or list or combination of both)
  
this last level can be calculated

however a better approach would be to list all "in config" variables and find their values when possible.  Looping through all calculated variables allows to calculate at least some variables

Problem:
I want to calculate DPT time. This is based on the quantity of pieces which is a quantity known in the calculation context. So the calculations cannot be performed upfront.

The solution is to perform calculated variables calculations on-the-fly when calling the config, with a "context" parameter somehow (possibly a config object attribute or a function parameter, TBD) which will hold the context values needed for calculation (e.g. the number of parts)

This solves the issue of the context values but not necessarily of the nested calculated variables

For nested calculated variables, a solution could be to replace literally in the structure the reference of a calculated variable by its expression. This would be the first step in the evaluation of the calculated variables, with recursive behaviour until no reference to calculated variable is present in the expression. All values of all references present can then be calculated.

So the evaluation goes through three steps:

1. Find all variable references and the corresponding values. Context references have priority over config references.
2. Replace all references by their expression. If a reference referred to another calculated variable, this will reintroduce new references to look for.
3. Loop 1 and 2 until no reference to other variables is left.
4. Perform the calculation

In [112]:
test_str = "=nb_of_days^3 * database.nb_of_ports"
operand_parser.parse_string(test_str).asList()

[[[['nb_of_days'], '^', 3], '*', ['database', 'nb_of_ports']]]

In [113]:
operand_parser.parse_string(find_formulas(test_content)[('owner', 'calculated_value')])

ParseResults([ParseResults([ParseResults([ParseResults([['nb_of_days'], '^', 3], {}), '*', ['database', 'nb_of_ports']], {}), '+', ParseResults([2, '/', ParseResults([['data', 0, 1, 0], '^', 2], {}), '*', 0.00232, '*', ['_input_parts']], {}), '-', 'cos', ParseResults([2, '*', ['nb_of_days']], {})], {})], {})

In [45]:
type(operand_parser)

pyparsing.core.And

nested () expression