In [13]:
import json
import random
with open("datasets/taylor_dataset_10k.json") as f:
    raw_data = json.load(f)


In [14]:
comp_taylor_series = [raw_data[idx]["taylor_series"] for idx in range(len(raw_data))]

In [15]:
import re
numbers = []
for expr in comp_taylor_series:
    numbers.extend(re.findall(r'-?\d+\/\d+|-?\d+', expr))

In [18]:
from collections import Counter

dist_numbs = [int(n) for n in numbers if n.isdigit()]

dist_counts = Counter(dist_numbs)   # mapping number â†’ its frequency

# e.g. inspect
# print(dist_counts)

In [19]:
dist_counts

Counter({2: 7730,
         3: 7637,
         4: 6387,
         1: 1389,
         5: 736,
         6: 422,
         8: 354,
         9: 206,
         12: 171,
         7: 144,
         10: 140,
         16: 85,
         20: 80,
         15: 76,
         18: 76,
         24: 53,
         11: 42,
         32: 38,
         25: 34,
         14: 31,
         36: 29,
         13: 20,
         72: 18,
         17: 18,
         40: 15,
         21: 15,
         50: 14,
         30: 13,
         54: 12,
         48: 11,
         27: 10,
         96: 10,
         60: 10,
         64: 10,
         33: 10,
         28: 9,
         80: 9,
         19: 9,
         128: 8,
         22: 8,
         108: 7,
         45: 7,
         35: 6,
         200: 5,
         42: 5,
         125: 5,
         26: 5,
         38: 5,
         34: 4,
         288: 4,
         23: 4,
         160: 4,
         81: 3,
         256: 3,
         63: 3,
         62: 3,
         75: 3,
         864: 3,
         144: 3,
      

In [41]:
import re
from fractions import Fraction

# Fixed denominators: k! for k = 0..5
# Multiplying raw coeff c_k by k! gives the k-th derivative at 0
FACTORIALS = [1, 1, 2, 6, 24]


def taylor_to_coeffs(taylor_str: str, max_order: int = 4) -> list[float]:
    """
    Convert a Taylor series string to derivative coefficients [a0..a5].

    Output vector:  [f(0), f'(0), f''(0), f'''(0), f''''(0), f'''''(0)]
    Relationship:   a_k = raw_coeff_k * k!

    Examples
    --------
    'x + (1/2)*x**2 + (1/6)*x**3 + (1/24)*x**4'  ->  [0, 1, 1, 1, 1, 0]
    'x**2 + (-1/2)*x**4'                           ->  [0, 0, 2, 0, -12, 0]
    '(7)*x'                                         ->  [0, 7, 0, 0, 0, 0]
    """
    coeffs = [Fraction(0)] * (max_order + 1)

    # Strip spaces/parens and normalise exponent symbol
    s = taylor_str.replace(' ', '').replace('**', '^').replace('(', '').replace(')', '')

    # Split into signed terms (don't split on negatives inside exponents/fractions)
    for term in re.split(r'(?<![e^*/])(?=[+\-])', s):
        if not term:
            continue

        # Term contains x -> extract coefficient and power
        m = re.match(r'^([+\-]?[\d/]*)\*?x(?:\^(\d+))?$', term)
        if m:
            raw_c = m.group(1)
            power = int(m.group(2) or 1)
            if power > max_order:
                continue
            if raw_c in ('', '+'):
                c = Fraction(1)
            elif raw_c == '-':
                c = Fraction(-1)
            else:
                c = Fraction(raw_c)
            coeffs[power] += c

        # Constant term (no x)
        elif 'x' not in term:
            try:
                coeffs[0] += Fraction(term)
            except ValueError:
                pass

    # Multiply each raw coeff by k! to recover the k-th derivative value
    return [float(coeffs[k] * FACTORIALS[k]) for k in range(max_order + 1)]


# -- Usage --------------------------------------------------------------------
if __name__ == '__main__':
    import json

    raw_data = json.load(open('datasets/taylor_dataset_10k.json'))

    idx = random.randint(0, len(raw_data) - 1)
    taylor_series = raw_data[idx]['taylor_series']
    print('Input  :', taylor_series)

    a = taylor_to_coeffs(taylor_series)
    labels = ["f(0)", "f'(0)", "f''(0)", "f'''(0)", "f''''(0)"]
    for label, val in zip(labels, a):
        if val:
            print(f"  {label} = {val}")

Input  : (5)*x + (-2)*x**2 -x**3
  f'(0) = 5.0
  f''(0) = -4.0
  f'''(0) = -6.0


In [None]:

print(f"Example {idx}:")
taylor_series = raw_data[idx]["taylor_series"]
print(taylor_series)
demo_single(taylor_str=taylor_series)

Example 7879:
1 + (-1/2)*x**2 + (-1/8)*x**4

Input : 1 + (-1/2)*x**2 + (-1/8)*x**4
Raw coefficients  [c0..c5] : [1.0, 0.0, -0.5, 0.0, -0.125]
Derivative values [a0..a5] : [1.0, 0.0, -1.0, 0.0, -3.0]
  a0=f(0)=1.0000,  a1=f'(0)=0.0000,  
          f"a2=f''(0)=-1.0000,  a3=f'''(0)=0.0000,  
          f"a4=f''''(0)=-3.0000 
Reconstructed     : 1 + (-1/2)*x**2 + (-1/8)*x**4



Input : x + (12)*x**3
Raw coefficients  [c0..c5] : [0.0, 1.0, 0.0, 12.0, 0.0]
Derivative values [a0..a5] : [0.0, 1.0, 0.0, 72.0, 0.0]
  a0=f(0)=0.0000,  a1=f'(0)=1.0000,  
          f"a2=f''(0)=0.0000,  a3=f'''(0)=72.0000,  
          f"a4=f''''(0)=0.0000 
Reconstructed     : (1)*x + (12)*x**3
