# math_modular_NLP_train_test_benchmark_generator

- rough pre-alpha code but some basic functions working

- RPN-shuting-yard checked against alternate calculation and error ~unit-tests

- alpaca format ish demo working





# RPN Calculator

In [None]:
from math import sin, cos, tan, radians, sqrt, pi ,e


# RPN-Calculator Helper
def to_rpn(expression):
    try:
        tokens = tokenize(expression)
        rpn = shunting_yard(tokens)

        recipe = []
        for token in rpn:
            if isinstance(token, (int, float)):
                recipe.append(("PUSH", token))
            else:
                recipe.append(("OPERATOR", token))

        return recipe

    except Exception as e:
        print(f"ERROR in function: to_rpn(expression) = {str(e)}" )

def get_precedence(op):
    precedence = {'+': 1, '-': 1, '*': 2, '/': 2, 'u-': 3, '**': 4, 'sin': 5, 'cos': 5, 'tan': 5, 'sqrt': 5}
    return precedence[op] if op in precedence else 0

# new
def shunting_yard(expression_tokens):
    output = []
    operators = []

    for token in expression_tokens:
        if isinstance(token, float) or token in ['pi', 'e']:  # Treat pi and e as numbers
            output.append(token)

        # # [Handling for operators and parentheses]
        # print(f"shunting_yard() processing token: {token}")
        # # [Existing shunting yard logic]
        # print(f"Current output: {output}, Current operators: {operators}")

        if '.' in token or token.replace('-', '', 1).isdigit():  # Handle floats and negative numbers
            output.append(float(token))
        elif token in ['+', '-', '*', '/', '**', 'sin', 'cos', 'tan', 'sqrt', 'abs']:
            while operators and operators[-1] != '(' and get_precedence(token) <= get_precedence(operators[-1]):
                output.append(operators.pop())
            operators.append(token)
        elif token == 'u-':
            operators.append(token)
        elif token == '(':
            operators.append(token)
        elif token == ')':
            while operators and operators[-1] != '(':
                output.append(operators.pop())
            operators.pop()  # Pop the '('

    while operators:
        output.append(operators.pop())

    # Ensure the shunting yard algorithm checks for unbalanced parentheses
    open_parentheses = 0
    for token in expression_tokens:
        if token == '(':
            open_parentheses += 1
        elif token == ')':
            open_parentheses -= 1
        # [Rest of the logic]
    if open_parentheses != 0:
        raise ValueError("Unbalanced parentheses in expression")

    # # Prints
    # for token in expression_tokens:
    #     print(f"Processing token: {token}")

    #     # [Existing shunting yard logic]
    #     print(f"Current operator stack: {operators}")
    #     print(f"Current output queue: {output}")

    # print(f"Final RPN output: {output}")
    return output


def tokenize(expression):
    expression = expression.replace(' ', '')   # Remove spaces
    expression = expression.replace('\n', '')  # Remove newline
    expression = expression.replace('\t', '')  # Remove newline

    tokens = []
    i = 0
    while i < len(expression):
        # print(f"tokenize(), current character: {expression[i]}, Position: {i}")  # Diagnostic print

        if expression[i] == ' ':
            i += 1
            continue

        # Check for exponentiation operator '**'
        if expression[i] == '*' and i + 1 < len(expression) and expression[i + 1] == '*':
            tokens.append('**')
            i += 2
            continue  # Skip the next character to avoid double-counting '*'

        # Handling binary operators, excluding the case where they are followed by a unary minus
        if expression[i] in '+*/' and i + 1 < len(expression) and expression[i + 1] in '+*/' and expression[i + 1] != '-':
            raise ValueError(f"Invalid consecutive operators: {expression[i]}{expression[i + 1]} at position {i}")


        elif expression[i] == '-':  # Handling unary minus
            # if i == 0 or expression[i-1] in '+-*/(' or (i > 1 and expression[i-1] == ' ' and expression[i-2] in '+-*/('):
            if i == 0 or expression[i-1] in '+-*/(':
                tokens.append('u-')  # Unary minus
                # print(f"Appended unary minus: u-")
            else:
                  # Check for consecutive binary operators (invalid syntax)
                  if i + 1 < len(expression) and expression[i + 1] in '+*/':
                      raise ValueError(f"Invalid consecutive operators: {expression[i]}{expression[i + 1]} at position {i}")
                  tokens.append('-')  # Binary minus (subtraction)
            i += 1

        # elif expression[i] in '+*/' and i + 1 < len(expression) and expression[i + 1] in '+-*/':  # Add a new elif block here to check for consecutive binary operators
        #     raise ValueError(f"Invalid consecutive operators: {expression[i]}{expression[i + 1]} at position {i}")

        elif expression[i] == 'e':  # Check for 'e'
            tokens.append('e')  # Append the value of e
            i += 1
        elif i + 1 < len(expression) and expression[i:i+2] == 'pi':  # Check for 'pi'
            tokens.append('pi')  # Append the value of pi
            i += 2
        elif expression[i] == '*' and i + 1 < len(expression) and expression[i + 1] == '*':
            tokens.append('**')
            i += 2
        elif expression[i:i+3] == 'sin' or expression[i:i+3] == 'cos' or expression[i:i+3] == 'tan':
            tokens.append(expression[i:i+3])
            i += 3
        elif (expression[i] == '-' and i + 1 < len(expression) and (expression[i + 1].isdigit() or expression[i + 1] == '.')) or expression[i].isdigit() or expression[i] == '.':
            j = i + 1
            while j < len(expression) and (expression[j].isdigit() or expression[j] == '.'):
                j += 1
            tokens.append(expression[i:j])
            i = j
        elif expression[i:i+3] == 'abs':
            tokens.append('abs')
            i += 3
            continue
        elif expression[i:i+4] == 'sqrt':
            tokens.append('sqrt')
            i += 4

        else:
            if not expression[i].isdigit() and expression[i] not in '+-*/()':
                print(f"Invalid character detected: {repr(expression[i])}")
            tokens.append(expression[i])
            i += 1

    return tokens


def sanitizer_validator(expression):
    try:
        expression = expression.replace('×', '*')
        expression = expression.replace('−', '-')
        expression = expression.replace('^', '**')
        expression = expression.replace('=', '')
        expression = expression.replace('°', '')  # Handling degree symbol
        return expression
    except Exception as e:
        print(f"ERROR in function: sanitizer_validator(expression) = {str(e)}")


def evaluate_recipe(recipe):
    stack = []
    for operation, value in recipe:
        # # inspection
        # print(f"evaluate_recipe(), Processing operation: {operation} with value -> {value}")

        # print(f"Processing {operation}, Value: {value}")
        # # [Existing logic for handling different operations]
        # print(f"Current stack: {stack}")

        # Check and convert 'pi' and 'e' to their numerical values
        if value == 'pi':
            stack.append(pi)  # Push the value of pi onto the stack
        elif value == 'e':
            stack.append(e)  # Push the value of e onto the stack

        elif operation == "PUSH":
            stack.append(value)
        elif operation == "OPERATOR":
            if value == 'pi':
                value = pi
            elif value == 'e':
                value = e
            elif value == 'u-':  # Unary minus
                if len(stack) < 1:
                    raise ValueError("Not enough operands for unary minus")
                operand = stack.pop()
                result = -operand
            elif value == "-":
                if len(stack) < 2:
                    raise ValueError("Not enough operands for subtraction")
                operand2 = stack.pop()
                operand1 = stack.pop()
                result = operand1 - operand2
            elif value in ["sin", "cos", "tan"]:
                operand = radians(stack.pop())  # Convert to radians
                if value == "sin":
                    result = sin(operand)
                elif value == "cos":
                    result = cos(operand)
                elif value == "tan":
                    result = tan(operand)
            elif value == 'abs':
                operand = stack.pop()
                result = abs(operand)
            elif value == 'sqrt':
                operand = stack.pop()
                if operand < 0:
                    raise ValueError("Cannot calculate the square root of a negative number")
                result = sqrt(operand)
                # stack.append(result)
            else:
                operand2 = stack.pop()
                operand1 = stack.pop()
                if value == "+":
                    result = operand1 + operand2
                elif value == "-":
                    result = operand1 - operand2
                elif value == "**":
                    result = operand1 ** operand2
                elif value == "*":
                    result = operand1 * operand2
                elif value == "/":
                    if operand2 == 0:
                        raise ValueError("Division by zero error")
                    result = operand1 / operand2

            stack.append(result)  # Push the result back onto the stack

    # inspection
    # print(f"Final stack state: {stack}")

    return stack[0]


# ALU helper
def rpn_calculator(raw_expression):
    try:
        # print(f"original expression - > {raw_expression}")
        expression = sanitizer_validator(raw_expression)
        # print(f"sanitized expression - > {expression}")

        try:
            tokens = tokenize(expression)
            rpn = shunting_yard(tokens)
            recipe = []
            for token in rpn:
                if isinstance(token, (int, float)):
                    recipe.append(("PUSH", token))
                else:
                    recipe.append(("OPERATOR", token))
        except ValueError as e:
            print(f"Error in tokenization or shunting yard: {e}")
            return (raw_expression, [], "Error: " + str(e))

        final_result = evaluate_recipe(recipe)
        # print(f"final_result - > {final_result}")

        result_tuple = (raw_expression, recipe, final_result)
        # print(f"result_tuple -> {result_tuple}")

        return result_tuple

    except Exception as e:
        print(f"ERROR in function: rpn_calculator(raw_expression), for {raw_expression}  = {str(e)}")
        return (raw_expression, [], "Error: " + str(e))


# ALU helper
def alu_list_rpn_calculator(raw_expression_list):
    """
    example use:
    alu_list_rpn_calculator(["1+1"])
    """
    try:
        # print(f" alu_list_rpn_calculator() incoming raw_expression_list -> {raw_expression_list}")

        report_list = []
        for i in raw_expression_list:
            #  print(f" for i in raw_expression_list: i -> {i}")

            result = rpn_calculator(i)
            # print(f" result -> {result}")

            report_list.append( result )

        return report_list

    except Exception as e:
        print(f"ERROR in function: alu_list_rpn_calculator(raw_expression_list), for expression {i}  = {str(e)}" )


# ALU helper function
def alu_find_equivalent_expressions(expressions):
    """
    Step 1: make dicionary
    Step 2: report as tuples

    Starting with this format:
    ("original expression string", [list, of, steps,], solution_value)
    find equivilance between expressions in the tuple list,
    returning perhaps a list of tuples with each tuple representing
    equivilant expressions, and inside being first a list
    of those expressions and then the value they all equal
    e.g. [([exp1,exp3],5), ([exp2,exp4], .5)]
    """
    try:
        # Create a dictionary to group expressions by their solution values
        equivalence_dict = {}
        for expression, _, value in expressions:
            if value not in equivalence_dict:
                equivalence_dict[value] = []
            equivalence_dict[value].append(expression)

        # Create the final list of tuples containing equivalent expressions and their values
        result = [(expressions, value) for value, expressions in equivalence_dict.items() if len(expressions) > 1]

        return result

    except Exception as e:
        print(f"ERROR in function: alu_find_equivalent_expressions(expressions)  = {str(e)}" )




###########
# Tests 1
###########


def mathlist_run(math_list_string):
    # print("math_list_string", math_list_string)
    math_list = math_list_string.split(',')
    for i in math_list:
        # print( alu_list_rpn_calculator([i]))
        alu_list_rpn_calculator([i])


i = "(7 - 3) - (5 - 10)"
# # i = "(7) - (10)"
alu_list_rpn_calculator([i])

alu_list_rpn_calculator(["1+1"])

alu_list_rpn_calculator(["sin(30°)"])

alu_list_rpn_calculator(["sin 30"])

alu_list_rpn_calculator(["abs(-3)"])

math_list_string = """3 + 4 * 2 / ( 1 - 5 ),-5 - (-6),
−4×(−9),−4*(−9),
5 + (-3), 10 + (-7), -4 + 11, -2 + 9,
(-5) + (-3), (-10) - (-7), (-15) + (-8), (-4) - 11,
5 - (-3), 10 - (-7), 15 - (-8), 4 - (-11),
5 * (-3), 10 * (-7), -4 * 11, -2 * 9,
5 / (-3), 10 / (-7), -4 / 11, -2 / 9,
abs(5), abs(-3), abs(10), abs(-7),
5**2, (-3)**2, 10**2, (-7)**2,
5**3, 3**4, 2**7, 6**2,
0.5**3, 0.3**4, 0.2**7, 0.6**2,
2**3, 3**4, 4**2, sin(30°) ,cos(30°) , tan(30°)"""

mathlist_run(math_list_string)

##############
# Test Set 2
##############

# Basic Arithmetic Operations: Test all basic arithmetic operations (addition, subtraction, multiplication, division) with positive, negative, and zero values.
math_list = ["1 + 2", "-3 - 4", "5 * -6", "7 / -8", "0 * 3", "-5 + 0"]
alu_list_rpn_calculator(math_list)

# alu_list_rpn_calculator(["5 * -6"])

# Unary Operations: Check the handling of unary minus in various contexts.
math_list = ["-(3 + 4)", "-3 + 4", "3 + -4", "--5", "-(-3 - -4)"]
alu_list_rpn_calculator(math_list)

alu_list_rpn_calculator(["-(-3 - -4)"])


# Floating Point Numbers: Test with floating-point numbers to ensure accurate handling of decimal values.
math_list = ["3.5 + 2.1", "-4.7 * 0.5"]
alu_list_rpn_calculator(math_list)

# Parentheses and Order of Operations: Test expressions with nested parentheses and mixed operations to check if the order of operations is respected.
math_list = ["(2 + 3) * 4", "2 * (3 + 4)", "(5 - (6 / 2)) * 3"]
alu_list_rpn_calculator(math_list)

# Functions and Advanced Operations: Include trigonometric functions, exponentiation, square roots, and absolute values.
math_list = ["sin(30)", "cos(-45)", "tan(60)", "2 ** 3", "sqrt(16)", "abs(-9)"]
alu_list_rpn_calculator(math_list)

# Edge Cases: Test cases that often result in errors like division by zero, very large and small numbers, and invalid inputs.
math_list = ["1 / 0", "99999999 * 9999999", "1 / 0.0000001", "sin(90) / cos(90)", "sqrt(-1)"]
alu_list_rpn_calculator(math_list)

# Complex Expressions: Combine multiple operations in a single expression to test complex parsing.
math_list = ["3 + 4 * 2 / (1 - 5)", "5 - -3 + 2 * 4"]
alu_list_rpn_calculator(math_list)

# Whitespace and Formatting Variations: Ensure the parser can handle expressions with varying whitespace and formatting.
math_list = [" 3+5 ", "\n4*5", " 7\t- 2 "]
alu_list_rpn_calculator(math_list)

# Special Mathematical Constants and Variables: If your calculator supports it, include tests with special constants like π, e, etc.
math_list = ["2 * pi", "e ** 2"]
alu_list_rpn_calculator(math_list)

# Invalid Expressions: Test with invalid or malformed expressions to ensure appropriate error handling.
math_list = ["2 ++ 2", "sin(30 + 60", "2 */ 3"]
alu_list_rpn_calculator(math_list)



ERROR in function: rpn_calculator(raw_expression), for 1 / 0  = Division by zero error
ERROR in function: rpn_calculator(raw_expression), for sqrt(-1)  = Cannot calculate the square root of a negative number
Error in tokenization or shunting yard: Invalid consecutive operators: ++ at position 1
Error in tokenization or shunting yard: Unbalanced parentheses in expression
Error in tokenization or shunting yard: Invalid consecutive operators: */ at position 1


[('2 ++ 2', [], 'Error: Invalid consecutive operators: ++ at position 1'),
 ('sin(30 + 60', [], 'Error: Unbalanced parentheses in expression'),
 ('2 */ 3', [], 'Error: Invalid consecutive operators: */ at position 1')]

# words

In [None]:
# import random
# import csv
# from datetime import datetime

# # Define basic operations
# def add(a, b):
#     return a + b

# def subtract(a, b):
#     return a - b

# # Function to select random operands
# def random_operand():
#     return random.randint(1, 10)  # You can adjust the range as needed

# # Function to construct a sentence using different templates
# def construct_question(operator, operands, template_id):
#     if operator == "sum":
#         phrase = "What is the sum of"
#     else:
#         phrase = "What is the difference between"

#     if template_id == 1:
#         return f"{phrase} {operands[0]} and {operands[1]}?"
#     elif template_id == 2:
#         return f"Calculate the {operator} of {operands[0]} and {operands[1]}."
#     # More templates can be added here
#     else:
#         return f"{operands[0]} {operator} {operands[1]} equals what?"

# # Function to generate a math word problem
# def generate_problem():
#     # Select operator
#     op_function = random.choice([add, subtract])
#     op_name = "sum" if op_function is add else "difference"

#     # Select operands
#     operands = [random_operand(), random_operand()]

#     # Select a template and construct the question
#     template_id = random.randint(1, 3)  # Assuming we have 3 templates for now
#     question = construct_question(op_name, operands, template_id)

#     # Calculate the answer
#     answer = op_function(*operands)

#     return question, answer

# # Function to generate a set of problems and save to a CSV file
# def generate_problems_set(n):
#     problems = [generate_problem() for _ in range(n)]
#     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
#     filename = f"problem_answer_set_{timestamp}.csv"

#     with open(filename, 'w', newline='') as file:
#         writer = csv.writer(file)
#         writer.writerow(["Problem", "Answer"])
#         writer.writerows(problems)

#     return filename

# # Example: Generate 5 problems
# generate_problems_set(5)


TODO:

1. create a cleaner way to select operations
2. add more operations in
3. make sure the english phrases work per operation (there may need to be a separate phrase list per operation)

2. fix the math equation generation code



# Math Problem Generator (basic very minimal)

In [None]:
"""

This code mostly works but needs to be modified in a few ways:

math word problem phrases:

The goal is to create a system that can handle multi-step, multi-operation, mathematical problems and scale effectively.

The key point is having a system that is modular and can be expanded, not to reach too far too fast with adding modules. Starting with just addition now is fine but in the proper scalable way which is the goal.


final_output = {
    "text_word_problem": text_word_problem,
    "math_notation": math_notation,
    "final_answer_solution": final_answer_solution,
    "solution_process": solution_process,}


Debugging and refinement:
in this example output, there is no clear operation between part 1 and part 2

('What is the result of (the sum of 8 and 4) and (the difference between 7 and 1)?',
 4)

'What is the result of PART_1 and PART_2?'
e.g.
'What is the result of 3 and 4?'
This is just a list of operands with no operator(s).
given that the answer was supposed to be '4'
"12 ? 6 = 4", where "?" is what operator?

if the RPN system is used, that partially solves the problem,
but a valid equation must be generated.


For example, let's start with the math problem: 1 + 1 = 2.

How many ways are there to ask this same question?


{front_phrase()} {operator} of {list_the_operands()}?
becomes
What is the sum of 1 and 1?

operator_phrase:
"the sum of"
"the difference between"

operand_list:
[1,2,3]
or
["one", "two", "three"]

list_the_operands version:
"1, 2, 3, and 4"
"1,2,3,4"
"1 and 2 and 3 and 4"
"one, two, three, and four"

trunk_equation = f"the {operator_phrase} {list_the_operands()}"

Prefix-phrases include:
"What is the" can be:
"Calculate" trunk_equation.
"Find" trunk_equation.
"Can you find" trunk_equation?
"Please" find trunk_equation?
"Now, find" trunk_equation?
"Let's find" trunk_equation?

And, alternately, post-fix phrases include:
"[trunk_equation]" is what?

Other terms may only be used with some operators
"[trunk_equation] makes what?"
"[trunk_equation] adds up to what?"
"[trunk_equation] sums up to what?"

Note: any number values, and any sequence of values (1 + 1 + 1, or 2 + 2 + 2, etc) need to be able to be put into this framework.

and

these need to be modular so can say

What is the difference between the sum of 1 and 2, and the difference between 9 and 7?

What is 1 + 2 - 4?
What do you get when you subtract (the sum of 1+2+5) from the difference between 4 and 2?

Note, later this can includes questions such as:

"Two is the sum of one and what other number?"

"""


import random
import csv
from datetime import datetime

# Define basic operations
def add(a, b):
    return a + b

def subtract(a, b):
    return a - b

# Function to select random operands
def random_operand():
    return random.randint(1, 10)  # You can adjust the range as needed



# Reverting to the original construct_question function with some modifications for clarity
def construct_question(operator, operands, template_id, nested=False):
    """
    Constructs a question part based on the operator, operands, and template.
    The nested parameter allows for different phrasing if the question is part of a larger problem.
    """
    """
    TODO, this is not sufficiently modular:
    a list of phrases must be used.
    A. there are prefix-phrase versions and
    B. suffix phrase versions, this needs to be a bigger branching logic tree.
    and because there are many possible operations,
    there need to be modular sets of phrases for each type of operation,
    NOT HARD CODED.
    There will be a kind of mini-database of phrases by type of operation
    and type of phrase.
    The implimentation may start in a simple way,
    BUT THE SYSTEM MUST SCALE
    """
    if operator == "sum":
        phrase = "the sum of"
    elif operator == "difference":
        phrase = "the difference between"

    if template_id == 1:
        return f"What is {phrase} {operands[0]} and {operands[1]}?" if not nested else f"{phrase} {operands[0]} and {operands[1]}"
    else:
        return f"{phrase} {operands[0]} and {operands[1]} equals what?" if not nested else f"{phrase} {operands[0]} and {operands[1]}"



# Updating the generate_modular_problem function to include clear operations between parts
def generate_modular_problem():
    """
    TODO:
    Somewhere, if not within this function, a math-notation version
    of the problem needs to be created. This should NOT be solved here
    rather a separate rpn_calculator() function will be used.
    """
    # Select operator and operands for the first part
    op_function_1 = random.choice([add, subtract])

    op_name_1 = "sum" if op_function_1 is add else "difference"
    op_symbol_1 = "+" if op_function_1 is add else "-"

    operands_1 = [random_operand(), random_operand()]

    # Construct the first part of the question
    template_id_1 = random.randint(1, 2)
    question_part_1 = construct_question(op_name_1, operands_1, template_id_1, nested=True)

    # # Construct the standard math notation for the first part
    math_notation_1 = f"{operands_1[0]} {op_symbol_1} {operands_1[1]}"

    print("math_notation_1", math_notation_1)

    # Optionally, create a second part for a more complex problem
    if random.choice([True, False]):  # 50% chance to create a more complex problem
        op_function_2 = random.choice([add, subtract])

        op_name_2 = "sum" if op_function_2 is add else "difference"
        op_symbol_2 = "+" if op_function_2 is add else "-"

        operands_2 = [random_operand(), random_operand()]

        # Construct the standard math notation for the second part
        math_notation_2 = f"{operands_2[0]} {op_symbol_2} {operands_2[1]}"

        # Construct the second part of the question
        template_id_2 = random.randint(1, 2)
        question_part_2 = construct_question(op_name_2, operands_2, template_id_2, nested=True)

        # Define the connecting operation
        connecting_op_function = random.choice([add, subtract])
        connecting_op = "+" if connecting_op_function is add else "-"

        # Combine the two parts with the connecting operation in standard math notation
        full_math_expression = f"({math_notation_1}) {connecting_op} ({math_notation_2})"

        # Combine the two parts with the connecting operation
        text_word_problem = f"What is the result of ({question_part_1}) {connecting_op} ({question_part_2})?"
        answer = connecting_op_function(op_function_1(*operands_1), op_function_2(*operands_2))

    else:
        # For a single operation
        full_math_expression = math_notation_1

        # For a single operation
        text_word_problem = f"What is {question_part_1}?"
        answer = op_function_1(*operands_1)

    """
    Todo
      future goal: feed math_notation into another function set:

      the rpn_calculator()
    """

    print("full_math_expression", full_math_expression)

    rpn_question, rpn_solution, rpn_answer = rpn_calculator(full_math_expression)

    print("rpn_question", rpn_question)
    print("rpn_solution", rpn_solution)
    print("rpn_answer", rpn_answer)


    # Combine the components into the final output
    final_output = {
        "text_word_problem": text_word_problem,
        "math_notation": full_math_expression,
        "final_answer_solution": rpn_answer,
        "solution_process": rpn_solution,
    }

    print("final_output", final_output)


    return final_output

# Generate a modular problem with clear connecting operations
result = generate_modular_problem()

def transform_to_alpaca_format(data):
    # Extracting information from the input data
    text_word_problem = data['text_word_problem']
    original_output_json = {
        'math_notation': data['math_notation'],
        'final_answer_solution': data['final_answer_solution'],
        'solution_process': data['solution_process']
    }

    # Constructing the ALPaCA formatted data
    alpaca_data = {
        "instruction": "Ask and answer a math problem.",
        "input": text_word_problem,
        "output": original_output_json,
        "text": f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nAsk and answer a math problem.\n\n### Input:\n{text_word_problem}\n\n### Output:\n{original_output_json}"
    }

    return alpaca_data

print("\n\n Alpaca-ish format -> \n")
print( transform_to_alpaca_format(result) )

math_notation_1 5 + 2
full_math_expression (5 + 2) + (2 - 9)
rpn_question (5 + 2) + (2 - 9)
rpn_solution [('PUSH', 5.0), ('PUSH', 2.0), ('OPERATOR', '+'), ('PUSH', 2.0), ('PUSH', 9.0), ('OPERATOR', '-'), ('OPERATOR', '+')]
rpn_answer 0.0
final_output {'text_word_problem': 'What is the result of (the sum of 5 and 2) + (the difference between 2 and 9)?', 'math_notation': '(5 + 2) + (2 - 9)', 'final_answer_solution': 0.0, 'solution_process': [('PUSH', 5.0), ('PUSH', 2.0), ('OPERATOR', '+'), ('PUSH', 2.0), ('PUSH', 9.0), ('OPERATOR', '-'), ('OPERATOR', '+')]}


 Alpaca-ish format -> 

{'instruction': 'Ask and answer a math problem.', 'input': 'What is the result of (the sum of 5 and 2) + (the difference between 2 and 9)?', 'output': {'math_notation': '(5 + 2) + (2 - 9)', 'final_answer_solution': 0.0, 'solution_process': [('PUSH', 5.0), ('PUSH', 2.0), ('OPERATOR', '+'), ('PUSH', 2.0), ('PUSH', 9.0), ('OPERATOR', '-'), ('OPERATOR', '+')]}, 'text': "Below is an instruction that describes a ta

# TODO
1. make tables of data in a given training data format
2. make RPN based errors
3.

# Select
- how many peoblems to make
- what final format to be in
- what kinds of math to use
- how many nested levels to have
- the scale of how many operations (operands & operators) to have in a problem (e.g. 1 + 1 + 1... 50x times is a scale challenge)
- the scale of how many problems to ask in a series of problems