# CONFIG

In [3]:
class CFG:
    data_size = 5000
    max_num_ops = 8
    max_integer = 1

# Imports

In [4]:
!pip install interruptingcow

Collecting interruptingcow
  Downloading interruptingcow-0.8.tar.gz (5.0 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: interruptingcow
  Building wheel for interruptingcow (setup.py) ... [?25l[?25hdone
  Created wheel for interruptingcow: filename=interruptingcow-0.8-py3-none-any.whl size=5808 sha256=a3db43aed03c12bd05e46f0ad0e42f5f937591785f503bf05fb5946e8d1686fa
  Stored in directory: /root/.cache/pip/wheels/5d/ab/3d/9f6b20913b1784d251eb7f625b4050be3b7f16650eb64cbd55
Successfully built interruptingcow
Installing collected packages: interruptingcow
Successfully installed interruptingcow-0.8


In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


import sympy
from sympy import expand
from sympy import sympify
from sympy import series
from sympy import Symbol, symbols
from sympy import im, I
from sympy import diff, limit, nan


from tqdm import tqdm
from typing import List, Tuple
from dataclasses import dataclass
from interruptingcow import timeout

# SYMPY HANDLING

In [6]:
import sympy as sp

variables = [
        'x'
        ]

operators = {
    # Elementary functions
    sp.Add: 'add',
    sp.Mul: 'mul',
    sp.Pow: 'pow',
    sp.exp: 'exp',
    sp.log: 'ln',
    sp.Abs: 'abs',
    sp.sign: 'sign',
#     sp.Sub: 'sub',
#     sp.Div: 'div',
    # Trigonometric Functions
    sp.sin: 'sin',
    sp.cos: 'cos',
    sp.tan: 'tan',
    # sp.cot: 'cot',
    sp.sec: 'sec',
    sp.csc: 'csc',
    # Trigonometric Inverses
    sp.asin: 'asin',
    sp.acos: 'acos',
    # sp.atan: 'atan',
    sp.acot: 'acot',
    sp.asec: 'asec',
    sp.acsc: 'acsc',
    # Hyperbolic Functions
    sp.sinh: 'sinh',
    sp.cosh: 'cosh',
    sp.tanh: 'tanh',
    sp.coth: 'coth',
    sp.sech: 'sech',
    sp.csch: 'csch',
    # Hyperbolic Inverses
    sp.asinh: 'asinh',
    sp.acosh: 'acosh',
    sp.atanh: 'atanh',
    sp.acoth: 'acoth',
    sp.asech: 'asech',
    sp.acsch: 'acsch',
    sp.Min: 'min',
    sp.Max: 'max',
    # Derivative
    sp.Derivative: 'derivative',
}

operators_inv = {operators[key]: key for key in operators}
operators_inv.update({'sub': lambda x, y: x - y,'div': lambda x, y: x / y})
operators_inv["mul("] = sp.Mul
operators_inv["add("] = sp.Add

operators_nargs = {
    # Elementary functions
    'mul(': -1,
    'add(': -1,
    'add': 2,
    'sub': 2,
    'mul': 2,
    'div': 2,
    'pow': 2,
    'rac': 2,
    'inv': 1,
    'pow2': 1,
    'pow3': 1,
    'pow4': 1,
    'pow5': 1,
    'sqrt': 1,
    'exp': 1,
    'ln': 1,
    'abs': 1,
    'sign': 1,
    'max': 2,
    'min': 2,
    # Trigonometric Functions
    'sin': 1,
    'cos': 1,
    'tan': 1,
    'cot': 1,
    'sec': 1,
    'csc': 1,
    # Trigonometric Inverses
    'asin': 1,
    'acos': 1,
    'atan': 1,
    'acot': 1,
    'asec': 1,
    'acsc': 1,
    # Hyperbolic Functions
    'sinh': 1,
    'cosh': 1,
    'tanh': 1,
    'coth': 1,
    'sech': 1,
    'csch': 1,
    # Hyperbolic Inverses
    'asinh': 1,
    'acosh': 1,
    'atanh': 1,
    'acoth': 1,
    'asech': 1,
    'acsch': 1,
    # Derivative
    'derivative': 2,
}

# these will be converted to the numbers format in `format_number`
integers_types = [
        sp.core.numbers.Integer,
        sp.core.numbers.One,
        sp.core.numbers.NegativeOne,
        sp.core.numbers.Zero,
        ]

numbers_types = integers_types + [
    sp.core.numbers.Rational,
    sp.core.numbers.Half,
    sp.core.numbers.Exp1,
    sp.core.numbers.Pi,
    sp.core.numbers.ImaginaryUnit,
    sp.core.numbers.Float,
]

# don't continue evaluating at these, but stop
atoms = [
    str,
    sp.core.symbol.Symbol,
    sp.core.numbers.Exp1,
    sp.core.numbers.Pi,
    sp.core.numbers.ImaginaryUnit,
] + numbers_types

    

def flatten(l, ltypes=(list, tuple)):
    """
    flatten a python list
    from http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
    """
    ltype = type(l)
    l = list(l)
    i = 0
    while i < len(l):
        while isinstance(l[i], ltypes):
            if not l[i]:
                l.pop(i)
                i -= 1
                break
            else:
                l[i:i + 1] = l[i]
        i += 1
    return ltype(l)


def format_number(number):
    if type(number) in integers_types:
        return format_integer(number)
    elif type(number) == sp.core.numbers.Rational or type(number) == sp.core.numbers.Float:
        return format_rational(sp.Rational(number))
    elif type(number) == sp.core.numbers.Half:
        return format_half()
    elif type(number) == sp.core.numbers.Exp1:
        return format_exp1()
    elif type(number) == sp.core.numbers.Pi:
        return format_pi()
    elif type(number) == sp.core.numbers.ImaginaryUnit:
        return format_imaginary_unit()
    else:
        raise NotImplementedError
        
def format_float(number):
    return [str(number)]

def format_exp1():
    return ['E']

def format_pi():
    return ['pi']

def format_imaginary_unit():
    return ['I']

def format_half():
    """
    for some reason in sympy 1/2 is its own object and not a rational.
    This function formats it correctly like `format_rational`
    """
    return ['mul'] + ['s+', '1'] + ['pow'] + ['s+', '2'] + ["s-", "1"]

def format_rational(number):
    # for some reason number.p is a string
    p = sp.sympify(number.p)
    q = sp.sympify(number.q)
    return ['mul'] + format_integer(p) + ['pow'] + format_integer(q) + ['s-', '1']

def format_integer(integer):
    """take a sympy integer and format it as in
    https://arxiv.org/pdf/1912.01412.pdf

    input:
        integer: a `sympy.Integer` object, e.g. `sympy.Integer(-1)`

    output:
        [sign_token, digit0, digit1, ...]
        where sign_token is 's+' or 's-'

    Example:
        format_integer(sympy.Integer(-123))
        >> ['s-', '1', '2', '3']

    Implementation notes:
    Somehow Integer inherits from Rational in Sympy and a rational is p/q,
    so integer.p is used to extract the number.
    """
    # plus_sign = "s+"
    plus_sign = "s+"
    minus_sign = "s-"
    abs_num = abs(integer.p)
    is_neg = integer.could_extract_minus_sign()
    digits = list(str(abs_num))
    # digits = [str(abs_num)]

    if is_neg:
        ret = [minus_sign] + digits
    else:
        ret = [plus_sign] + digits

    return ret

def parse_if_str(x):
    if isinstance(x, str):
        return sp.parsing.parse_expr(x)
    return x

def rightmost_string_pos(expr_arr, pos=-1):
    if isinstance(expr_arr[pos], str):
        return len(expr_arr)+pos
    else:
        return rightmost_string_pos(expr_arr, pos-1)


def rightmost_operand_pos(expr, pos):
    operators = list(operators_inv.keys()) + ["s+", "s-"] + variables
    if expr[pos] in operators:
        return pos
    else:
        return rightmost_operand_pos(expr, pos-1)

def unformat_integer(arr):
    """
    inverse of the function format_integer.

    input:
        arr: array of strings just as the output of format_integer. E.g. ["s+", "4", "2"]

    output:
        the correspinding sympy integer, e.g. sympy.Integer(42) in the above example.

    The sign tokens are "s+" for positive integers and "s-" for negative. 0 comes with "s+", but does not matter.

    """
    sign_token = arr[0]
    ret = "-" if sign_token == "s-" else ""
    for s in arr[1:]:
        ret += str(s)

    return sp.parsing.parse_expr(ret)

def prefix_to_sympy(expr_arr):
    if len(expr_arr) == 1:
        return parse_if_str(expr_arr[0])
    op_pos = rightmost_operand_pos(expr_arr,len(expr_arr) - 1)
    if (op_pos == -1) | (op_pos == len(expr_arr)):
        print("something went wrong, operator should not be at end of array")
    op = expr_arr[op_pos]
    if op in operators_inv.keys():
        num_args = operators_nargs[op]
        op = operators_inv[op]
        args = expr_arr[op_pos+1:op_pos+num_args+1]
        args = [parse_if_str(a) for a in args]
        func = op(*args)
        expr = expr_arr[0:op_pos] + [func] + expr_arr[op_pos+num_args+1:]
        return prefix_to_sympy(expr)

    elif (op == 's+') | (op == "s-"):
        # int_end_pos = rightmost_int_pos(expr_arr)
        string_end_pos = rightmost_string_pos(expr_arr)
        integer = unformat_integer(expr_arr[op_pos:string_end_pos+1])
        expr_arr_new = expr_arr[0:op_pos] + [integer] + expr_arr[string_end_pos+1:]
        return prefix_to_sympy(expr_arr_new)
    elif op in variables:
        op = sp.sympify(op)
        expr_arr_new = expr_arr[0:op_pos] + [op] + expr_arr[op_pos+1:]
        return prefix_to_sympy(expr_arr_new)

    return op


def repeat_operator_until_correct_binary(op, args, ret=[]):
    """
    sympy is not strict enough with the number of arguments.
    E.g. multiply takes a variable number of arguments, but for
    prefix notation it needs to ALWAYS have exactly 2 arguments

    This function is only for binary operators.

    Here I choose the convention as follows:
        1 + 2 + 3 --> + 1 + 2 3

    This is the same convention as in https://arxiv.org/pdf/1912.01412.pdf
    on page 15.

    input:
        op: in string form as in the list `operators`
        args: [arg1, arg2, ...] arguments of the operator, e.c. [1, 2, x**2,
                ...]. They can have other things to be evaluated in them
        ret: the list you already have. Usually []. Watch out, I think one has to explicitely give [],
            otherwise somehow the default value gets mutated, which I find a strange python behavior.
    """

    is_binary = operators_nargs[op] == 2
    assert is_binary, "repeat_operator_until_correct_binary only takes binary operators"

    if len(args) == 0:
        return ret
    elif len(ret) == 0:
        ret = [op] + args[-2:]
        args = args[:-2]
    else:
        ret = [op] + args[-1:] + ret
        args = args[:-1]

    return repeat_operator_until_correct_binary(op, args, ret)

def sympy_to_prefix_rec(expression, ret):
    """
    Recursively go from a sympy expression to a prefix notation.
    The operators all get converted to their names in the array `operators`.
    Returns a nested list, where the nesting basically stands for parentheses.
    Since in prefix notation with a fixed number of arguments for each function (given in `operators_nargs`),
    parentheses are not needed, we can flatten the list later.
    """
    if expression in [sp.core.numbers.Pi, sp.core.numbers.ImaginaryUnit]:
        f = expression
    else:
        f = expression.func
    if f in atoms:
        if type(expression) in numbers_types:
            return ret + format_number(expression)
        return ret + [str(expression)]
    f_str = operators[f]
    f_nargs = operators_nargs[f_str]
    args = expression.args
    if len(args) == 1 & f_nargs == 1:
        ret = ret + [f_str]
        return sympy_to_prefix_rec(args[0], ret)
    if len(args) == 2:
        ret = ret + [f_str, sympy_to_prefix_rec(args[0], []), sympy_to_prefix_rec(args[1], [])]
    if len(args) > 2:
        args = list(map(lambda x: sympy_to_prefix_rec(x, []), args))
        ret = ret + repeat_operator_until_correct_binary(f_str, args)
    return ret

def sympy_to_prefix(expression):
    """
    Recursively go from a sympy expression to a prefix notation.
    Returns a flat list of tokens.
    """
    return flatten(sympy_to_prefix_rec(expression, []))


# Expression Generator

In [7]:
@dataclass
class Operator:
    arity: int
    weight: int
    
class ExpressionGenerator:
    OPERATORS = {
        # Elementary functions
        "+": Operator(2, 8),
        "-": Operator(2, 5),
        "*": Operator(2, 8),
        "/": Operator(2, 5),
        "^": Operator(2, 2),
        
        "pow2": Operator(1, 6),
        "pow3": Operator(1, 4),
        "pow4": Operator(1, 3),
        "pow5": Operator(1, 4),
        "pow6": Operator(1, 3),
        
        # "inv": Operator(1, 8),
        "sqrt": Operator(1, 8),
        "exp": Operator(1, 2),
        "ln": Operator(1, 4),
        # 'abs': Operator(1, 2),
        
        # Trigonometric Functions
        "sin": Operator(1, 6),
        "cos": Operator(1, 6),
        "tan": Operator(1, 4),
        # "cot": Operator(1, 2),
        
        # Inverse functions
        "asin": Operator(1, 2),
        "acos": Operator(1, 2),
        # "atan": Operator(1, 1),
        "acot": Operator(1, 2),
    }
    def __init__(self, max_ops, rng: np.random.Generator, variables=None):
        self.variables = variables if variables is not None else ["x"]
        self.nl = 1  # self.n_leaves
        self.p1 = 1  # len(self.una_ops)
        self.p2 = 1  # len(self.bin_ops)
        self.max_ops = max_ops
        self.leaf_probs = [1, 0, 0, 0]
        self.leaf_probs = self.leaf_probs / np.sum(self.leaf_probs)

        self.ubi_dist = self.generate_ubi_dist()
        self.una_ops = []
        self.bin_ops = []
        for op_name, op in self.OPERATORS.items():
            if op.arity == 1:
                self.una_ops.append(op_name)
            else:
                self.bin_ops.append(op_name)
        self.una_ops_probs = self.convert_to_dist(self.una_ops)
        self.bin_ops_probs = self.convert_to_dist(self.bin_ops)
        self.rng = rng

    def convert_to_dist(self, operations: List[str]):
        array = []
        for operation in operations:
            array.append(self.OPERATORS[operation].weight)
        array = np.array(array)
        return array / array.sum()

    def generate_ubi_dist(self):
        """
        Copied from https://github.com/facebookresearch/SymbolicMathematics/blob/4596d070e1a9a1c2239c923d7d68fda577c8c007/src/envs/char_sp.py
        `max_ops`: maximum number of operators
        Enumerate the number of possible unary-binary trees that can be generated from empty nodes.
        D[e][n] represents the number of different binary trees with n nodes that
        can be generated from e empty nodes, using the following recursion:
            D(0, n) = 0
            D(e, 0) = L ** e
            D(e, n) = L * D(e - 1, n) + p_1 * D(e, n - 1) + p_2 * D(e + 1, n - 1)
        """
        # enumerate possible trees
        # first generate the tranposed version of D, then transpose it
        D = [[0] + ([self.nl ** i for i in range(1, 2 * self.max_ops + 1)])]
        for n in range(1, 2 * self.max_ops + 1):  # number of operators
            s = [0]
            for e in range(1, 2 * self.max_ops - n + 1):  # number of empty nodes
                s.append(
                    self.nl * s[e - 1]
                    + self.p1 * D[n - 1][e]
                    + self.p2 * D[n - 1][e + 1]
                )
            D.append(s)
        assert all(len(D[i]) >= len(D[i + 1]) for i in range(len(D) - 1))
        D = [
            [D[j][i] for j in range(len(D)) if i < len(D[j])]
            for i in range(max(len(x) for x in D))
        ]
        return D

    def get_leaf(self, max_int: int):
        """
        Generate a leaf.
        """
        leaf_type = self.rng.choice(len(self.leaf_probs), p=self.leaf_probs)
        if leaf_type == 0:
            return self.rng.choice(self.variables)
        elif leaf_type == 1:
            num = self.rng.integers(1, max_int + 1)
            if self.rng.uniform() <= 0.5:
                return -num
            return num
        elif leaf_type == 2:
            return np.round(self.rng.uniform(-max_int, max_int), 4)
        else:
            return 0

    def sample_next_pos_ubi(self, nb_empty, nb_ops):
        """
        Sample the position of the next node (unary-binary case).
        Sample a position in {0, ..., `nb_empty` - 1}, along with an arity.
        """
        assert nb_empty > 0
        assert nb_ops > 0
        probs = []
        for i in range(nb_empty):
            probs.append(
                (self.nl ** i) * self.p1 * self.ubi_dist[nb_empty - i][nb_ops - 1]
            )
        for i in range(nb_empty):
            probs.append(
                (self.nl ** i) * self.p2 * self.ubi_dist[nb_empty - i + 1][nb_ops - 1]
            )
        probs = [p / self.ubi_dist[nb_empty][nb_ops] for p in probs]
        probs = np.array(probs, dtype=np.float64)
        e = self.rng.choice(2 * nb_empty, p=probs)
        arity = 1 if e < nb_empty else 2
        e = e % nb_empty
        return e, arity

    def generate_expr(self, nb_total_ops, max_int):
        """
        Copied from https://github.com/facebookresearch/SymbolicMathematics/blob/4596d070e1a9a1c2239c923d7d68fda577c8c007/src/envs/char_sp.py
        Create a tree with exactly `nb_total_ops` operators.
        """
        stack = [None]
        nb_empty = 1  # number of empty nodes
        l_leaves = 0  # left leaves - None states reserved for leaves
        t_leaves = 1  # total number of leaves (just used for sanity check)

        # create tree
        last_op = None
        for nb_ops in range(nb_total_ops, 0, -1):

            # next operator, arity and position
            skipped, arity = self.sample_next_pos_ubi(nb_empty, nb_ops)
            if arity == 1:
                op = self.rng.choice(self.una_ops, p=self.una_ops_probs)  # add probs
            else:
                op = self.rng.choice(self.bin_ops, p=self.bin_ops_probs)  # add probs

            nb_empty += (
                self.OPERATORS[op].arity - 1 - skipped
            )  # created empty nodes - skipped future leaves
            t_leaves += self.OPERATORS[op].arity - 1  # update number of total leaves
            l_leaves += skipped  # update number of left leaves

            # update tree
            pos = [i for i, v in enumerate(stack) if v is None][l_leaves]
            stack = (
                stack[:pos]
                + [op]
                + [None for _ in range(self.OPERATORS[op].arity)]
                + stack[pos + 1 :]
            )

        _ = last_op
        leaves = [self.get_leaf(max_int) for _ in range(t_leaves)]
        self.rng.shuffle(leaves)

        # insert leaves into tree
        for pos in range(len(stack) - 1, -1, -1):
            if stack[pos] is None:
                stack = stack[:pos] + [leaves.pop()] + stack[pos + 1 :]
        assert len(leaves) == 0
        return [str(s) for s in stack]

    def rewrite(self, arg, x):
        if x == "pow2":
            return f"(({arg})^2)"
        elif x == "pow3":
            return f"(({arg})^3)"
        elif x == "pow4":
            return f"(({arg})^4)"
        elif x == "pow5":
            return f"(({arg})^5)"
        elif x == "pow6":
            return f"(({arg})^6)"
        elif x == "inv":
            return f"(1/({arg}))"
        else:
            return x + "(" + arg + ")"

    def infix(self, exp):
        """Returns an infix string representation giving a prefix token list."""
        stack = []
        for x in reversed(exp):
            if x not in self.OPERATORS:
                stack.append(x)
            elif self.OPERATORS[x].arity == 1:
                arg = stack.pop()
                stack.append(self.rewrite(arg, x))
            else:
                left = stack.pop()
                right = stack.pop()
                stack.append("(" + left + " " + x + " " + right + ")")
        assert len(stack) == 1
        return stack[0]

# Data

In [9]:

def convert(infix):
    return expand(sympy.simplify(str(infix)))#.evalf(8)


class Data():
    """
    
    """
    def __init__(self, config, order, precision, point):
        self.CFG = config
        self.order = order
        self.point = point
        self.exp_gen = ExpressionGenerator(self.CFG.max_num_ops, np.random.default_rng(), ['x'])
        self.x = symbols('x')

    def get_limit(self, expr, point):
        sub = expr.subs(self.x, point)
        
        if sub.is_finite:
            return sub
            
        if limit(sub, self.x, point).is_finite:
            return limit(sub, point)
            
        return nan
        
    def taylor_expsn(self, expr_str:str, point):
        expr = sympify(expr_str)

        # zeroth term
        expr_sub = self.get_limit(expr, point)
        if not expr_sub.is_finite:
            return nan

        zeroth_term = expr_sub
        fact = 1
        taylor = zeroth_term
        for order in range(1, self.order+1):
            der = diff(expr, self.x, order)
            der_sub = self.get_limit(der, point)
            if not der_sub.is_finite:
                continue
            fact *= order
            o_term = ((self.x - point)**order * der_sub) / fact 
            taylor += o_term
        return taylor
        
    def gen_expr(self):
        retry = 0
        while retry < 20:
            try:
                with timeout(5, exception= RuntimeError):
                    x = symbols('x')
                    num_operations = self.exp_gen.rng.integers(1, self.CFG.max_num_ops + 1)
                    pfx = self.exp_gen.generate_expr(num_operations, self.CFG.max_integer)
                    ifx = self.exp_gen.infix(pfx)
                    ifx = convert(ifx)
                    # ifx = prefix_to_sympy(pfx)
                    expsn = self.taylor_expsn(ifx, self.point)
                    coeff = [expsn.coeff(x,_) for _ in range(self.order+1)]
                    assert len(expsn.args)>0, 'No useful expansion!'
                    
                    assert sum([im(_)!=0 for _ in coeff]) == 0, 'Imaginary is bad!'
                    return pfx, ifx, expsn
            except RuntimeError:
                print('Timed out')
                continue
            except Exception as ex:
                # if retry == 1:
                #     print(ex)
                    
                retry += 1
                continue
        print('Skipping Number')
        raise Exception('hit max tries')
    def generate(self):
        
        df = pd.DataFrame()
        function = []
        prefixes = []
        expansion = []
        x = symbols('x')
        for i in tqdm(range(self.CFG.data_size)):
            try:
                prefix, expression, expnsn = self.gen_expr()
            except:
                continue
            prefixes.append(prefix)
            function.append(str(expression))
            expansion.append(str(expnsn))
        df['prefix'] = prefixes    
        df['function'] = function
        df['expansion'] = expansion
        # Dataframe which contain functions and their corresponding expansion
        df = df.sample(frac=1, random_state=2023, ignore_index=True)
        return df


# Generate Data

In [10]:
data = Data(CFG, 4, 8, 0)

In [11]:
df = data.generate()

  2%|▏         | 85/5000 [00:35<1:54:53,  1.40s/it]

Timed out


  3%|▎         | 174/5000 [01:03<30:22,  2.65it/s]  

Timed out


  8%|▊         | 378/5000 [02:16<29:02,  2.65it/s]  

Timed out


  8%|▊         | 390/5000 [02:24<28:48,  2.67it/s]  

Timed out


 20%|██        | 1011/5000 [05:04<1:24:47,  1.28s/it]

Timed out


 20%|██        | 1018/5000 [05:06<30:39,  2.16it/s]  

Timed out


 22%|██▏       | 1095/5000 [05:42<1:39:26,  1.53s/it]

Timed out


 24%|██▍       | 1214/5000 [06:08<08:19,  7.59it/s]  

Timed out


 34%|███▎      | 1679/5000 [08:35<40:32,  1.37it/s]  

Timed out


 34%|███▍      | 1716/5000 [08:52<1:15:10,  1.37s/it]

Timed out


 35%|███▍      | 1740/5000 [09:02<35:17,  1.54it/s]  

Timed out


 36%|███▌      | 1783/5000 [09:16<48:45,  1.10it/s]

Timed out


 41%|████      | 2058/5000 [10:35<09:34,  5.12it/s]  

Timed out


 45%|████▍     | 2241/5000 [11:48<44:03,  1.04it/s]  

Timed out


 46%|████▋     | 2317/5000 [12:06<06:52,  6.50it/s]

Timed out


 53%|█████▎    | 2650/5000 [14:03<31:31,  1.24it/s]

Timed out


 56%|█████▌    | 2779/5000 [14:46<50:58,  1.38s/it]  

Timed out


 57%|█████▋    | 2847/5000 [15:07<40:58,  1.14s/it]

Timed out


 64%|██████▍   | 3223/5000 [16:53<27:01,  1.10it/s]

Timed out


 70%|██████▉   | 3497/5000 [18:23<40:57,  1.64s/it]

Timed out


 71%|███████▏  | 3572/5000 [18:39<04:38,  5.13it/s]

Timed out


 73%|███████▎  | 3650/5000 [19:07<02:07, 10.61it/s]

Timed out


 75%|███████▌  | 3755/5000 [19:46<05:30,  3.77it/s]

Timed out


 75%|███████▌  | 3768/5000 [20:00<25:02,  1.22s/it]

Timed out


 81%|████████▏ | 4065/5000 [21:43<08:51,  1.76it/s]

Timed out


 84%|████████▍ | 4195/5000 [22:32<13:13,  1.01it/s]

Timed out


 84%|████████▍ | 4225/5000 [22:44<17:19,  1.34s/it]

Timed out


 86%|████████▋ | 4323/5000 [23:08<02:25,  4.65it/s]

Timed out


 95%|█████████▌| 4768/5000 [25:21<02:08,  1.80it/s]

Timed out


100%|██████████| 5000/5000 [26:46<00:00,  3.11it/s]


## Simplify and Sympify Expressions

In [14]:
df_new = pd.DataFrame(columns=['prefix','function','expansion'])
pbar = tqdm(df.iterrows(), total = len(df))

for idx, row in pbar:
    pbar.set_description(f'{len(df_new)}')
    try:
        ifx = row['function']
        pfx = sympy_to_prefix(sympify(ifx))
        df_new.loc[idx] = {'prefix':pfx,'function':row['function'], 'expansion':row['expansion']}
    except:
        print(row)
        continue

2099:  42%|████▏     | 2091/5000 [00:10<00:17, 170.60it/s]

prefix       [acos, -, x, sin, ^, -, /, x, -, x, x, x, x]
function                        acos(x - sin((zoo*x)**x))
expansion                                   acos(-sin(1))
Name: 2072, dtype: object


4998: 100%|██████████| 5000/5000 [00:25<00:00, 194.82it/s]


In [15]:
df_new.to_csv(f'final_data_{len(df_new)}.csv',index=False)

## What fraction of them have both expression and taylor expansion same? 
i.e expressions with only powers of x below 5

In [19]:
sum(df_new['function'] == df_new['expansion']) / len(df_new)

0.3660732146429286