In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
from scipy import integrate
import sympy as sy
import random
import os
import pandas as pd

### Generating "traditional" integrals

In [None]:
def generate_poly(num_terms):
    poly = np.zeros(10) 
    degrees = np.zeros(10)

    for i in range(1, num_terms):
        poly[i] = np.random.randint(0, 10)  
        degrees[i] = np.random.randint(1, 20)  

    return poly, degrees

def integral(degree):
    poly, degrees = generate_poly(degree)

    a = np.random.uniform(0, 100)

    eps_list = 10.**np.linspace(-6, 10, 100)

    def integrand(x, eps):
        return 1 / (eps + sum(poly[i] * x**degrees[i] for i in range(len(poly))))

    I_eps = [integrate.quad(integrand, 0, a, args=(eps,))[0] for eps in eps_list]

    # Plotting
    plt.rcParams.update({'font.size': 22})
    plt.figure(figsize=(15, 10))
    plt.loglog(eps_list, np.abs(I_eps), 'o', mfc='none', markersize=10, label='numerical')
    plt.xlabel('$\epsilon$')
    plt.ylabel('Integral')
    plt.legend(loc='best')

    return poly, degrees, a

In [None]:
poly, degrees, a = integral(7)

In [None]:
ep = sy.symbols('\epsilon')
x = sy.symbols('x')
print(degrees)
eq = ep+poly[1]*x**degrees[1] + poly[2]*x**degrees[2] +poly[3]*x**degrees[3] + poly[4]*x**degrees[4]+poly[5]*x**degrees[5]+poly[6]*x**degrees[6] + poly[7]*x**degrees[7] + poly[8]*x**degrees[8]+ poly[9]*x**degrees[9]

### Format LaTeX

In [None]:
def format_integral_question(poly, degrees, a):
    x = sy.symbols('x')  
    polynomial = sum(poly[i] * x**degrees[i] for i in range(len(poly))) 

    polynomial_latex = sy.latex(polynomial)

    latex_str = r'Consider the integral $I(\epsilon) = \int_0^{' + f'{a:.2f}' + r'} \frac{1}{\epsilon + ' + polynomial_latex + r'} dx$. Develop analytical formulas that approximate $I(\epsilon)$ for different regimes of $\epsilon$.'

    return latex_str

In [None]:
latex_question = format_integral_question(poly, degrees, a)
print(latex_question)

### Deriving approximate solutions

### Small $\epsilon$

In [None]:
def find_smallest(array):
    tempval = []
    tempindex = []

    for i, value in enumerate(array):
        if value != 0:
            tempval.append(value)  
            tempindex.append(i)  

    # Find the minimum among the non-zero values
    min_value = np.min(tempval)
    min_index = np.argmin(tempval)
    original_index = tempindex[min_index]

    return min_value, original_index

### Large $\epsilon$

In [None]:
def find_largest(array):
    tempval = []
    tempindex = []

    for i, value in enumerate(array):
        if value != 0:
            tempval.append(value)   
            tempindex.append(i)      

    # Find the maximum among the non-zero values
    max_value = np.max(tempval)
    max_index = np.argmax(tempval)
    original_index = tempindex[max_index]

    return max_value, original_index

In [None]:
def solve_simple_integral(degree):
    # Generate a random polynomial of the given degree
    poly, degrees = generate_poly(degree)

    a = np.random.randint(0, 100)

    latex_question = format_integral_question(poly, degrees, a)

    eps_list = np.logspace(-6, 40, 100)
    eps_list = np.concatenate(([1e-3], eps_list))

    I_eps = [integrate.quad(lambda x: 1/(eps + sum(poly[i] * x**degrees[i] for i in range(len(poly)))), 0, a)[0] for eps in eps_list]

    deg1, ind1 = find_smallest(degrees)
    deg, ind = find_largest(degrees)

    smalleps = [1/eps * (1/poly[ind1]*eps)**(1/deg1) for eps in eps_list]
    largeeps = [1/eps * (1/poly[ind]*eps)**(1/deg) for eps in eps_list]
    verylargeeps = [a/eps for eps in eps_list]

    epsilon = sy.Symbol('epsilon')

    smalleps_expr = round((1/poly[ind1]) ** (1/deg), 2) * 1/epsilon * epsilon ** round((1/deg), 2)
    largeeps_expr = round((1/poly[ind]) ** (1/deg1), 2) * 1/epsilon * epsilon ** round((1/deg1), 2)
    verylargeeps_expr = a/epsilon

    smalleps_latex = sy.latex(smalleps_expr)
    largeeps_latex = sy.latex(largeeps_expr)
    verylargeeps_latex = sy.latex(verylargeeps_expr)

    # Creating the LaTeX explanation
    explanation = r"""
    The integral is of the form $I(\epsilon) = \int_0^{%s} \frac{1}{\epsilon + P(x)} dx$ where $P(x)$ is a polynomial. Thus, its value can be estimated as the product between a height and a width.
    Since the integrand is maximized at $x = 0$, the height can be set to $\frac{1}{\epsilon}$.

    For small $\epsilon$,
    we define the width as the point where the integrand becomes half of its maximum height.
    This corresponds to solving for $x$ given $P(x) = \epsilon$.
    Applying dominant balance, considering the term in $P(x)$ with the smallest degree, the width is approximated as $ \left( \frac{1}{%s*\epsilon} \right)^{1/%s} $.
    Therefore, the analytical approximation of the integral for small $\epsilon$ is $\boxed{I(\epsilon) = %s}$.

    For an intermediate regime where $\epsilon$ is large,
    we also define the width based on the term with the largest degree.
    The width is approximated as \( \left( \frac{1}{%s*\epsilon} \right)^{1/%s} \).
    Therefore, the analytical approximation of the integral for large $\epsilon$ is $\boxed{I(\epsilon) = %s}$.

    If the width of the integral exceeds the range of integration, we consider one more regime for very large $\epsilon$.
    The width is then just the range of integration, so in this regime, the integral can be approximated as $\frac{L}{\epsilon}$.
    Therefore, the analytical approximation of the integral for very large $\epsilon$ is $\boxed{I(\epsilon) = %s}$.

    Altogether, the solutions at small, large, and very large $\epsilon$ are $\boxed{%s, %s, %s}$.
    """ % (a, poly[ind1], deg1, smalleps_latex, poly[ind], deg, largeeps_latex,\
        verylargeeps_latex, smalleps_latex, largeeps_latex, verylargeeps_latex)

    extracted_solution = r"""
    $$\boxed{[%s, %s]}$$
    """ % (smalleps_latex, largeeps_latex)

    # Numerical value at small x
    small_x_numerical_eval = np.abs(I_eps[0])

    # Numerical value at large x
    large_x_numerical_eval = np.abs(I_eps[int(len(I_eps)/3)])

    # Numerical value at very large x
    verylarge_x_numerical_eval = np.abs(I_eps[-1])

    # Approximate value at small x
    small_x_approx_eval = smalleps[0]

    # Approximate value at large x
    large_x_approx_eval = largeeps[int(len(I_eps)/3)]

    # Approximate value at very large x
    verylarge_x_approx_eval = verylargeeps[-1]

    return latex_question, explanation, extracted_solution, small_x_numerical_eval, large_x_numerical_eval,\
        verylarge_x_numerical_eval, small_x_approx_eval, large_x_approx_eval,\
            verylarge_x_approx_eval, poly, degrees, a

### Verifier of solution accuracy

In [None]:
def verifier(small_eps_numerical, large_eps_numerical, verylarge_eps_numerical,\
    small_eps_sol, large_eps_sol, verylarge_eps_sol, error_limit):

    place_holder_0 = 0

    if (np.abs(small_eps_sol) == np.Inf) or (small_eps_sol == place_holder_0) or (small_eps_sol == np.NAN):
        return True
    if (np.abs(large_eps_sol) == np.Inf) or (large_eps_sol == place_holder_0) or (large_eps_sol == np.NAN):
        return True
    if (np.abs(verylarge_eps_sol) == np.Inf) or (verylarge_eps_sol == place_holder_0) or (verylarge_eps_sol == np.NAN):
        return True 

    if (np.abs(small_eps_numerical) == np.Inf) or (small_eps_numerical == place_holder_0) or (small_eps_numerical == np.NAN):
        return True
    if (np.abs(large_eps_numerical) == np.Inf) or (large_eps_numerical == place_holder_0) or (large_eps_numerical == np.NAN):
        return True
    if (np.abs(verylarge_eps_numerical) == np.Inf) or (verylarge_eps_numerical == place_holder_0) or (verylarge_eps_numerical == np.NAN):
        return True

    if np.abs((large_eps_sol - large_eps_numerical) / (large_eps_numerical)) * 100 > error_limit:
        return True
    if np.abs((small_eps_sol - small_eps_numerical) / (small_eps_numerical)) * 100 > error_limit:
        return True
    if np.abs((verylarge_eps_sol - verylarge_eps_numerical) / (verylarge_eps_numerical)) * 100 > error_limit:
        return True
    
    return False

### Plotting the solutions for visual verification

In [None]:
def plot_solution(poly, degrees, a):
    eps_list = np.logspace(-6, 40, 100)
    a = np.random.randint(0, 100)

    latex_question = format_integral_question(poly, degrees, a)

    I_eps = [integrate.quad(lambda x: 1/(eps + sum(poly[i] * x**degrees[i] for i in range(len(poly)))), 0, a)[0] for eps in eps_list]

    deg1, ind1 = find_smallest(degrees)
    deg, ind = find_largest(degrees)

    smalleps = [1/eps * (1/poly[ind1]*eps)**(1/deg1) for eps in eps_list]
    largeeps = [1/eps * (1/poly[ind]*eps)**(1/deg) for eps in eps_list]
    verylargeeps = [a/eps for eps in eps_list]

    plt.rcParams.update({'font.size': 22})
    plt.figure(figsize=(15, 10))
    plt.loglog(eps_list, np.abs(I_eps), '-', mfc='none', markersize=10, label='Numerical Integration')
    plt.loglog(eps_list, smalleps, '-', label='Approximation for small $\epsilon$')
    plt.loglog(eps_list, largeeps, 'r+', label='Approximation for large $\epsilon$')
    plt.loglog(eps_list, verylargeeps, 'o', label='Approximation for very large $\epsilon$')

    plt.xlabel('$\epsilon$')
    plt.ylabel('Integral')
    plt.legend(loc='best')

In [None]:
plot_solution(poly, degrees, a)

### Generating the dataset

In [None]:
def generate_dataset(n, filename, error_limit=10):
  current_path = os.getcwd()

  data = []
  problem_type = "Integral"
  solution_type = "list"

  seen_questions = {}
  
  num_complete_problems = 0
  counter = 0

  eps_list = np.logspace(-6, 40, 100)
  eps_list = np.concatenate(([1e-3], eps_list))

  while num_complete_problems < n:
    try:

      degree = random.randint(2, 7)
      question, solution, extracted_solution,\
        small_x_numerical_eval, large_x_numerical_eval, verylarge_x_numerical_eval,\
          small_x_approx_eval, large_x_approx_eval, verylarge_x_approx_eval,\
          poly, degrees, a = solve_simple_integral(degree)
        
      if question in seen_questions:
        raise ValueError("Duplicate problem detected.")

      if verifier(small_x_numerical_eval, large_x_numerical_eval, verylarge_x_numerical_eval,\
        small_x_approx_eval, large_x_approx_eval, verylarge_x_approx_eval, error_limit):
        raise Exception("Percent error exceeded.")

      seen_questions[question] = True

      data.append([question, solution, problem_type, solution_type,\
         extracted_solution,\
          eps_list[0], small_x_approx_eval, small_x_numerical_eval,\
           eps_list[int(len(eps_list)/3)], large_x_approx_eval, large_x_numerical_eval])
          
      num_complete_problems += 1
      counter += 1
      print(f"Num complete: {num_complete_problems}")

    except Exception as e:
      print(f"Error in iteration {counter+1}: {e}. {num_complete_problems} correct; moving to the next iteration.")
      counter += 1
      continue  
    
  print(f"Num total iterations: {counter}")
  data = np.array(data)

  full_path = os.path.join(current_path, filename) 
  data_df = pd.DataFrame(data, columns=["question", "solution", "question type", "answer type",\
    "extracted answer", \
      "small_eval_point", "small_analytical", "small_numerical",\
        "large_eval_point", "large_analytical", "large_numerical"])

  data_df.to_csv(full_path, index=False)

  print(f"Number of complete problems: {num_complete_problems} / {counter}")
  return data_df

In [None]:
n = 150
data = generate_dataset(n, "integral_train_dataset.csv")