<a href="https://colab.research.google.com/github/srak71/Image-To-Asyptote-Tuning/blob/main/asymptote_data_gen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 0
from google.colab import drive
drive.mount('/content/drive')

# Uncomment on first run
#!sudo dpkg --configure -a
#!sudo apt-get update
#!sudo apt-get install -y asymptote

Mounted at /content/drive
Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:3 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:5 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:10 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:11 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [3,295 kB]
Get:12 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1,553 kB]
Get:13 http://archive.

In [None]:
# Cell 1
import random
import subprocess
import os
from PIL import Image
import matplotlib.pyplot as plt
import math # Added for pi, sin, cos, etc.

print("Testing environment setup...")

# Test Asymptote installation
try:
    # Using -V for version, as it's common and often more succinct
    result = subprocess.run(['asy', '-V'], capture_output=True, text=True, check=True)
    print(f"✅ Asymptote version: {result.stdout.strip()}")
except FileNotFoundError:
    print("❌ Asymptote not found. Please install it first.")
except subprocess.CalledProcessError as e:
    print(f"❌ Asymptote installed but version check failed. stderr: {e.stderr.strip()}")
    # Attempt to run a simple command to see if asy works at all
    try:
        subprocess.run(['asy', '-l'], capture_output=True, text=True, check=True)
        print("✅ Asymptote seems to be callable despite version check issue.")
    except Exception as e_simple:
        print(f"❌ Basic Asymptote command failed: {e_simple}")
except Exception as e:
    print(f"❌ Error checking Asymptote version: {e}")


print("✅ All imports successful!")

Testing environment setup...
✅ Asymptote version: Welcome to Asymptote version 2.78 (to view the manual, type help)
>
✅ All imports successful!


In [None]:
# Cell 2: Define Asymptote Templates

COMMON_SETTINGS = """
import graph;
size(300,200,IgnoreAspect);
xaxis("$x$", BottomTop, LeftTicks);
yaxis("$y$", LeftRight, RightTicks);
defaultpen(fontsize(10pt));
pen graphPen = linewidth(1.2);
"""

LINEAR_TEMPLATE = COMMON_SETTINGS + """
xlimits({xmin}, {xmax});
ylimits({ymin}, {ymax});
real f(real x) {{ return {slope}*x + {intercept}; }}
draw(graph(f, {xmin}, {xmax}), blue + graphPen);
label("{equation_label}", ({label_x}, {label_y}), blue);
"""

QUADRATIC_TEMPLATE = COMMON_SETTINGS + """
xlimits({xmin}, {xmax});
ylimits({ymin}, {ymax});
real f(real x) {{ return {a}*x^2 + {b}*x + {c}; }}
draw(graph(f, {xmin}, {xmax}), red + graphPen);
label("{equation_label}", ({label_x}, {label_y}), red);
"""

CIRCLE_TEMPLATE = COMMON_SETTINGS + """
xlimits({xmin}, {xmax});
ylimits({ymin}, {ymax});
path circle_path = Circle( ({h}, {k}), {r} );
draw(circle_path, green + graphPen);
label("{equation_label}", ({label_x}, {label_y}), green);
"""

ELLIPSE_TEMPLATE = COMMON_SETTINGS + """
xlimits({xmin}, {xmax});
ylimits({ymin}, {ymax});
path ellipse_path = shift({h},{k}) * scale({a},{b}) * Circle((0,0),1);
draw(ellipse_path, magenta + graphPen);
label("{equation_label}", ({label_x}, {label_y}), magenta);
"""

HYPERBOLA_TEMPLATE = COMMON_SETTINGS + """
xlimits({xmin}, {xmax});
ylimits({ymin}, {ymax});

pair f1(real t);
pair f2(real t);

if ({type} == 0) {{
    f1 = new pair(real t) {{ return ({h} + {a}*cosh(t), {k} + {b}*sinh(t)); }};
    f2 = new pair(real t) {{ return ({h} - {a}*cosh(t), {k} + {b}*sinh(t)); }};
}} else {{
    f1 = new pair(real t) {{ return ({h} + {a}*sinh(t), {k} + {b}*cosh(t)); }};
    f2 = new pair(real t) {{ return ({h} + {a}*sinh(t), {k} - {b}*cosh(t)); }};
}}

// Use an adaptive plot for potentially discontinuous functions like hyperbola parts
// graph(f, xmin, xmax, operator ..) might be better for function segments,
// but since hyperbola is parametric, graph(f, t_min, t_max, n=200) is fine.
// To avoid issues with asymptotes and extreme values in hyperbolas/tangents if directly plotted:
// Consider plotting segments or using adaptive plotting, or careful range setting.
// For hyperbolas defined parametrically this is less of an issue than explicit y=f(x) hyperbolas near asymptotes.
draw(graph(f1, {t_min}, {t_max}, n=200), orange + graphPen);
draw(graph(f2, {t_min}, {t_max}, n=200), orange + graphPen);
label("{equation_label}", ({label_x}, {label_y}), orange);
"""

SINE_TEMPLATE = COMMON_SETTINGS + """
xlimits({xmin}, {xmax});
ylimits({ymin}, {ymax});
real f(real x) {{ return {A}*sin({B}*(x - {C})) + {D}; }}
draw(graph(f, {xmin}, {xmax}, n=200), black + graphPen);
label("{equation_label}", ({label_x}, {label_y}), black);
"""

# --- NEW TEMPLATES ---
ABSOLUTE_VALUE_TEMPLATE = COMMON_SETTINGS + """
xlimits({xmin}, {xmax});
ylimits({ymin}, {ymax});
real f(real x) {{ return {A}*abs({B}*(x - {C})) + {D}; }}
draw(graph(f, {xmin}, {xmax}, n=200), purple + graphPen); // Using purple color
label("{equation_label}", ({label_x}, {label_y}), purple);
"""

TANGENT_TEMPLATE = COMMON_SETTINGS + """
xlimits({xmin}, {xmax});
ylimits({ymin}, {ymax});
real f(real x) {{ return {A}*tan({B}*(x - {C})) + {D}; }}
// Using 'adaptive' for plotting tangent which has discontinuities
// This may result in multiple segments being drawn
draw(graph(f, {xmin}, {xmax}, adaptive), brown + graphPen); // Using brown color
label("{equation_label}", ({label_x}, {label_y}), brown);
"""
# --- END NEW TEMPLATES ---

print("✅ Templates defined (import graph; added, label quotes fixed, NEW TEMPLATES ADDED)")

✅ Templates defined (import graph; added, label quotes fixed, NEW TEMPLATES ADDED)


In [None]:
# Cell 3 (with new generator functions)
import math
import random

def tex_signed_str(val, precision=2, force_plus=True):
    val_rounded = round(val, precision)
    if val_rounded == 0:
        if force_plus: return " + 0"
        else: return ""
    if val_rounded < 0: return f" - {abs(val_rounded)}"
    else:
        if force_plus: return f" + {val_rounded}"
        else: return f"{val_rounded}"

def tex_leading_coeff_str(val, precision=2):
    val_rounded = round(val, precision)
    if val_rounded == 1: return ""
    elif val_rounded == -1: return "-"
    elif val_rounded == 0: return "0"
    else: return f"{val_rounded}"

def tex_coeff_str(val, precision=2):
    val_rounded = round(val, precision)
    if val_rounded == 0: return ""
    sign = " + " if val_rounded > 0 else " - "
    abs_val = abs(val_rounded)
    if abs_val == 1: return sign
    else: return f"{sign}{abs_val}"

def format_num_simple(val, precision=2):
    return f"{round(val, precision)}"

def generate_linear_function():
    slope = round(random.uniform(-3, 3), 2)
    intercept = round(random.uniform(-5, 5), 2)
    if slope == 0 and intercept == 0: slope = 0
    elif slope == 0 : slope = 0

    xmin, xmax = -4, 4
    y_at_xmin = slope * xmin + intercept
    y_at_xmax = slope * xmax + intercept
    ymin_calc = min(y_at_xmin, y_at_xmax)
    ymax_calc = max(y_at_xmin, y_at_xmax)
    y_range = ymax_calc - ymin_calc
    padding = max(1, 0.1 * y_range if y_range > 0.01 else 1)

    ymin = ymin_calc - padding
    ymax = ymax_calc + padding
    label_x = (xmin + xmax) / 2
    label_y = ymax - padding * 0.5

    final_label_str = "$y = "
    if slope == 0:
        final_label_str += format_num_simple(intercept, 2)
    else:
        final_label_str += f"{tex_leading_coeff_str(slope,2)}x"
        if intercept != 0:
            final_label_str += tex_signed_str(intercept, 2, force_plus=True)
    final_label_str += "$"

    code = LINEAR_TEMPLATE.format(
        slope=slope, intercept=intercept,
        xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
        label_x=label_x, label_y=label_y,
        equation_label=final_label_str
    )
    return code, f"linear_s{slope}_i{intercept}"

def generate_quadratic_function():
    a = round(random.choice([-1, 1]) * random.uniform(0.2, 1.5), 2)
    b = round(random.uniform(-3, 3), 2)
    c = round(random.uniform(-4, 4), 2)
    if a == 0: a = 0.2

    xmin, xmax = -5, 5
    xv = -b / (2*a)
    yv = a*xv**2 + b*xv + c
    y_at_xmin = a*xmin**2 + b*xmin + c
    y_at_xmax = a*xmax**2 + b*xmax + c

    if xmin <= xv <= xmax:
        ymin_calc = yv if a > 0 else min(y_at_xmin, y_at_xmax)
        ymax_calc = yv if a < 0 else max(y_at_xmin, y_at_xmax)
    else:
        ymin_calc = min(y_at_xmin, y_at_xmax)
        ymax_calc = max(y_at_xmin, y_at_xmax)

    y_range = ymax_calc - ymin_calc
    padding = max(1.5, 0.15 * y_range if y_range > 0.01 else 1.5)
    ymin = ymin_calc - padding
    ymax = ymax_calc + padding
    label_x = (xmin + xmax) / 2
    label_y = ymax - padding * 0.5

    label_str = "$y = "
    label_str += f"{tex_leading_coeff_str(a,2)}x^2"
    b_formatted_term = tex_coeff_str(b,2) # Will be like " + 2" or " - 3" or ""
    if b_formatted_term: # If b is not zero
        label_str += f"{b_formatted_term}x"

    if c != 0 :
        label_str += tex_signed_str(c, 2, force_plus=True)
    elif a==0 and b==0: # special case y = 0
         label_str += tex_signed_str(0, 2, force_plus=True)
    label_str += "$"

    code = QUADRATIC_TEMPLATE.format(
        a=a, b=b, c=c,
        xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
        label_x=label_x, label_y=label_y,
        equation_label=label_str
    )
    return code, f"quad_a{a}_b{b}_c{c}"

def generate_circle_parameters():
    h = round(random.uniform(-2, 2), 2)
    k = round(random.uniform(-2, 2), 2)
    r = round(random.uniform(1, 3), 2)
    if r <= 0: r = 1.0

    padding = 0.5
    xmin = h - r - padding
    xmax = h + r + padding
    ymin = k - r - padding
    ymax = k + r + padding
    label_x = h
    label_y = k + r + padding * 0.5

    x_term_str = "x^2"
    if h != 0:
        h_sign_val = tex_signed_str(-h, 2, force_plus=True)
        x_term_str = f"(x {h_sign_val})^2"
    y_term_str = "y^2"
    if k != 0:
        k_sign_val = tex_signed_str(-k, 2, force_plus=True)
        y_term_str = f"(y {k_sign_val})^2"
    r_sq_str = format_num_simple(r**2, 2)
    equation_label = f"${x_term_str} + {y_term_str} = {r_sq_str}$"

    code = CIRCLE_TEMPLATE.format(
        h=h, k=k, r=r,
        xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
        label_x=label_x, label_y=label_y,
        equation_label=equation_label
    )
    return code, f"circle_h{h}_k{k}_r{r}"

def generate_ellipse_parameters():
    h = round(random.uniform(-1.5, 1.5), 2)
    k = round(random.uniform(-1.5, 1.5), 2)
    a_axis = round(random.uniform(1.5, 4), 2) # Semi-major/minor axis related
    b_axis = round(random.uniform(1, 3), 2)   # Semi-major/minor axis related
    if a_axis <= 0: a_axis = 1.0 # Ensure positive
    if b_axis <= 0: b_axis = 1.0 # Ensure positive


    padding = 0.5
    xmin = h - a_axis - padding
    xmax = h + a_axis + padding
    ymin = k - b_axis - padding
    ymax = k + b_axis + padding
    label_x = h
    label_y = k + b_axis + padding * 0.5 # Place label above ellipse

    x_term_num_str = "x^2"
    if h != 0:
        h_sign_val = tex_signed_str(-h, 2, force_plus=True)
        x_term_num_str = f"(x {h_sign_val})^2"
    y_term_num_str = "y^2"
    if k != 0:
        k_sign_val = tex_signed_str(-k, 2, force_plus=True)
        y_term_num_str = f"(y {k_sign_val})^2"

    a_sq_str = format_num_simple(a_axis**2, 2)
    b_sq_str = format_num_simple(b_axis**2, 2)
    equation_label = f"$\\\\frac{{{x_term_num_str}}}{{{a_sq_str}}} + \\\\frac{{{y_term_num_str}}}{{{b_sq_str}}} = 1$"

    code = ELLIPSE_TEMPLATE.format(
        h=h, k=k, a=a_axis, b=b_axis,
        xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
        label_x=label_x, label_y=label_y,
        equation_label=equation_label
    )
    return code, f"ellipse_h{h}_k{k}_a{a_axis}_b{b_axis}"

def generate_hyperbola_parameters():
    h = round(random.uniform(-1, 1), 2)
    k = round(random.uniform(-1, 1), 2)
    a_param = round(random.uniform(0.5, 2), 2)
    b_param = round(random.uniform(0.5, 2), 2)
    if a_param <= 0: a_param = 0.5
    if b_param <= 0: b_param = 0.5

    hyperbola_type = random.choice([0, 1]) # 0 for x-opening, 1 for y-opening
    t_min, t_max = -1.8, 1.8 # Parameter range for drawing
    padding = 0.5

    x_term_num_str = "x^2"
    if h != 0:
        h_sign_val = tex_signed_str(-h, 2, force_plus=True)
        x_term_num_str = f"(x {h_sign_val})^2"
    y_term_num_str = "y^2"
    if k != 0:
        k_sign_val = tex_signed_str(-k, 2, force_plus=True)
        y_term_num_str = f"(y {k_sign_val})^2"

    a_sq_str = format_num_simple(a_param**2, 2)
    b_sq_str = format_num_simple(b_param**2, 2)

    # Determine x/y limits based on max extent of hyperbola within t_min/t_max
    if hyperbola_type == 0: # (x-h)^2/a^2 - (y-k)^2/b^2 = 1
        max_x_dev = a_param * math.cosh(t_max) # Largest x-deviation from center
        max_y_dev = b_param * math.sinh(t_max) # Corresponding y-deviation
        equation_label = f"$\\\\frac{{{x_term_num_str}}}{{{a_sq_str}}} - \\\\frac{{{y_term_num_str}}}{{{b_sq_str}}} = 1$"
    else: # (y-k)^2/b^2 - (x-h)^2/a^2 = 1
        max_x_dev = a_param * math.sinh(t_max) # x-deviation if it opens along y-axis
        max_y_dev = b_param * math.cosh(t_max)
        equation_label = f"$\\\\frac{{{y_term_num_str}}}{{{b_sq_str}}} - \\\\frac{{{x_term_num_str}}}{{{a_sq_str}}} = 1$"

    xmin = h - max_x_dev - padding
    xmax = h + max_x_dev + padding
    ymin = k - max_y_dev - padding
    ymax = k + max_y_dev + padding

    label_x = xmin + (xmax - xmin) * 0.05
    label_y = ymax - (ymax-ymin) * 0.10


    code = HYPERBOLA_TEMPLATE.format(
        h=h, k=k, a=a_param, b=b_param, type=hyperbola_type,
        t_min=t_min, t_max=t_max,
        xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
        label_x=label_x, label_y=label_y,
        equation_label=equation_label
    )
    return code, f"hyperbola_t{hyperbola_type}_h{h}_k{k}_a{a_param}_b{b_param}"

def generate_sine_function():
    A_amp = round(random.uniform(0.5, 2.0), 2)
    if A_amp == 0: A_amp = 0.5
    if random.choice([True, False]): A_amp *= -1

    B_freq_factor = round(random.uniform(0.5, 3), 2)
    if B_freq_factor == 0: B_freq_factor = 0.5
    if random.choice([True, False]): B_freq_factor *= -1

    C_phase_shift = round(random.uniform(-math.pi, math.pi), 2) # Horizontal shift
    D_vert_shift = round(random.uniform(-1.5, 1.5), 2) # Vertical shift

    if B_freq_factor == 0: # Degenerate case: y = A*sin(-BC) + D -> constant
        period_val = float('inf')
    else:
        period_val = abs(2 * math.pi / B_freq_factor)

    # Determine xmin, xmax to show a decent number of periods
    num_periods_to_show = 1.75 # How many periods to try to display
    if period_val == float('inf'): # for B=0
         x_range_half_width = 2*math.pi # Some default range for constant lines
    else:
        x_range_half_width = num_periods_to_show * period_val / 2.0

    center_x = C_phase_shift # Center the view around the phase shift
    xmin = center_x - x_range_half_width
    xmax = center_x + x_range_half_width

    # Determine ymin, ymax
    ymin_calc = D_vert_shift - abs(A_amp) if A_amp !=0 else D_vert_shift
    ymax_calc = D_vert_shift + abs(A_amp) if A_amp !=0 else D_vert_shift
    if A_amp == 0 and B_freq_factor == 0: # Actually y=D
        ymin_calc = ymax_calc = D_vert_shift

    y_range = ymax_calc - ymin_calc
    padding = max(0.5, 0.1 * y_range if y_range > 0.01 else 0.5)
    ymin = ymin_calc - padding
    ymax = ymax_calc + padding

    label_x = (xmin + xmax) / 2
    label_y = ymax - padding * 0.5
    label_str = "$y = "
    A_coeff_str = tex_leading_coeff_str(A_amp, 2)

    if A_amp == 0 :
        label_str = f"$y = {format_num_simple(D_vert_shift,2)}$"
    else:
        label_str += A_coeff_str
        label_str += "\\\\sin("

        B_coeff_str = tex_leading_coeff_str(B_freq_factor, 2)
        x_minus_c_str = "x"
        if C_phase_shift != 0:
            c_term_str = tex_signed_str(-C_phase_shift, 2, force_plus=True)
            x_minus_c_str = f"(x {c_term_str})"

        if B_freq_factor == 0:
             inside_sin = B_freq_factor * (0 - C_phase_shift)
             label_str = f"$y = {format_num_simple(D_vert_shift,2)}$"

        elif B_coeff_str == "" :
            label_str += x_minus_c_str if B_freq_factor ==1 else f"-{x_minus_c_str}"
        elif B_coeff_str == "-":
             if x_minus_c_str == "x": label_str += "-x"
             else: label_str += f"-{x_minus_c_str}"
        else:
            label_str += f"{B_coeff_str}{x_minus_c_str}"

        if not (A_amp == 0 or B_freq_factor == 0):
            label_str += ")"
            if D_vert_shift != 0:
                label_str += tex_signed_str(D_vert_shift, 2, force_plus=True)
            elif A_amp != 0 and B_freq_factor != 0 and D_vert_shift == 0 :
                 pass
            elif A_amp == 0 and B_freq_factor == 0 :
                 pass
    label_str += "$"
    if A_amp == 0 or B_freq_factor == 0:
         label_str = f"$y = {format_num_simple(D_vert_shift,2)}$"


    code = SINE_TEMPLATE.format(
        A=A_amp, B=B_freq_factor, C=C_phase_shift, D=D_vert_shift,
        xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
        label_x=label_x, label_y=label_y,
        equation_label=label_str
    )
    return code, f"sine_A{A_amp}_B{B_freq_factor}_C{C_phase_shift}_D{D_vert_shift}"


# --- NEW GENERATOR FUNCS ---
def generate_absolute_value_function():
    A = round(random.choice([-1, 1]) * random.uniform(0.5, 2.0), 2)
    if A == 0: A = 0.5
    B = round(random.uniform(0.3, 2.5), 2)
    if B == 0: B = 0.5
    C = round(random.uniform(-3, 3), 2)
    D = round(random.uniform(-4, 4), 2)

    x_range_half_width = 4
    xmin = C - x_range_half_width
    xmax = C + x_range_half_width

    y_at_vertex = D
    y_at_xmin = A * abs(B * (xmin - C)) + D
    y_at_xmax = A * abs(B * (xmax - C)) + D

    if A > 0:
        ymin_calc = y_at_vertex
        ymax_calc = max(y_at_xmin, y_at_xmax)
    else:
        ymax_calc = y_at_vertex
        ymin_calc = min(y_at_xmin, y_at_xmax)

    y_range_abs = ymax_calc - ymin_calc
    padding = max(1.0, 0.15 * y_range_abs if y_range_abs > 0.01 else 1.0)
    ymin = ymin_calc - padding
    ymax = ymax_calc + padding
    label_x = C
    label_y = y_at_vertex + (padding * 0.75 if A > 0 else -padding * 0.75)
    label_str = "$y = "
    A_coeff_str = tex_leading_coeff_str(A, 2)
    label_str += A_coeff_str
    label_str += "|"

    B_coeff_str = tex_leading_coeff_str(B,2)
    x_minus_c_str = "x"
    if C != 0:
        c_term_str = tex_signed_str(-C, 2, force_plus=True)
        x_minus_c_str = f"(x {c_term_str})"

    if B_coeff_str == "" :
        label_str += x_minus_c_str
    else:
        label_str += f"{B_coeff_str}{x_minus_c_str}"

    label_str += "|"

    if D != 0:
        label_str += tex_signed_str(D, 2, force_plus=True)
    label_str += "$"

    code = ABSOLUTE_VALUE_TEMPLATE.format(
        A=A, B=B, C=C, D=D,
        xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
        label_x=label_x, label_y=label_y,
        equation_label=label_str
    )
    return code, f"absval_A{A}_B{B}_C{C}_D{D}"

def generate_tangent_function():
    A = round(random.choice([-1, 1]) * random.uniform(0.5, 1.5), 2)
    if A == 0: A = 0.5
    B = round(random.uniform(0.3, 1.5), 2)
    if B == 0: B = 0.3
    if random.choice([True, False]): B *= -1 # also allow negative B

    C = round(random.uniform(-math.pi/2, math.pi/2), 2) # horizontal shift
    D = round(random.uniform(-1, 1), 2) # vert shift

    if B == 0:
      B = 0.3

    period_val = abs(math.pi / B)
    num_periods_to_show = 1.5
    x_range_half_width = num_periods_to_show * period_val / 2.0
    # center on C.
    center_x = C
    xmin = center_x - x_range_half_width
    xmax = center_x + x_range_half_width
    ymax_abs = 5
    ymin_calc = -ymax_abs + D
    ymax_calc = ymax_abs + D
    padding = 1.0
    ymin = ymin_calc - padding
    ymax = ymax_calc + padding
    label_y = D + (ymax_abs*0.1 if A > 0 else -ymax_abs*0.1)
    label_x = max(xmin + (xmax-xmin)*0.1, min(xmax - (xmax-xmin)*0.1, label_x))
    label_y = max(ymin + (ymax-ymin)*0.1, min(ymax - (ymax-ymin)*0.1, label_y))


    label_str = "$y = "
    A_coeff_str = tex_leading_coeff_str(A, 2)
    label_str += A_coeff_str
    label_str += "\\\\tan("

    B_coeff_str = tex_leading_coeff_str(B, 2)
    x_minus_c_str = "x"
    if C != 0:
        c_term_str = tex_signed_str(-C, 2, force_plus=True)
        x_minus_c_str = f"(x {c_term_str})"

    if B_coeff_str == "" : # B = 1 or -1
        label_str += x_minus_c_str if B == 1 else f"-{x_minus_c_str}"
    elif B_coeff_str == "-": # B == -1
         if x_minus_c_str == "x": label_str += "-x"
         else: label_str += f"-{x_minus_c_str}"
    else:
        label_str += f"{B_coeff_str}{x_minus_c_str}"

    label_str += ")"
    if D != 0:
        label_str += tex_signed_str(D, 2, force_plus=True)
    label_str += "$"

    code = TANGENT_TEMPLATE.format(
        A=A, B=B, C=C, D=D,
        xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
        label_x=label_x, label_y=label_y,
        equation_label=label_str
    )
    return code, f"tan_A{A}_B{B}_C{C}_D{D}"

print("✅ Code generation functions defined.")

✅ Code generation functions defined.


In [None]:
# Cell 4: Compile
def compile_asymptote_to_image_with_path(code, output_base_path):
    asy_filename = f"{output_base_path}.asy"
    png_filename = f"{output_base_path}.png"
    try:
        with open(asy_filename, "w", encoding="utf-8") as f:
            f.write(code)
    except Exception as e:
        print(f"!!! Error writing .asy file {asy_filename}: {e}")
        return None, None # return None for asy_path also

    try:
        result = subprocess.run(
            ["asy", "-f", "png", "-o", png_filename, asy_filename],
            check=True, capture_output=True, text=True, timeout=30
        )
        print(f"✅Sucsess: Compiled {asy_filename} to {png_filename} successfully")
        return png_filename, asy_filename # return both paths
    except subprocess.CalledProcessError as e:
        print(f"!!! Error compiling {asy_filename}:")
        print(f"   Command: {' '.join(e.cmd)}")
        print(f"   Return code: {e.returncode}")
        print(f"   stdout: {e.stdout}")
        print(f"   stderr: {e.stderr}")
        if os.path.exists(asy_filename):
            return None, asy_filename
        return None, None
    except subprocess.TimeoutExpired:
        print(f"!!! Timeout compiling {asy_filename}.")
        if os.path.exists(asy_filename):
            return None, asy_filename
        return None, None
    except FileNotFoundError:
        print(f"!!! Asymptote executable ('asy') not found.")
        return None, None
    except Exception as e:
        print(f"!!! An unexpected error occurred: {e}")
        if os.path.exists(asy_filename):
            return None, asy_filename
        return None, None

print("✅ Compilation function defined (returns png and asy paths)")

✅ Compilation function defined (returns png and asy paths)


In [None]:
# Cell 5
import pandas as pd
import time
import os
FUNCTION_GENERATORS = {
    "linear": generate_linear_function,
    "quadratic": generate_quadratic_function,
    "circle": generate_circle_parameters,
    "ellipse": generate_ellipse_parameters,
    "hyperbola": generate_hyperbola_parameters,
    "sine": generate_sine_function,
    "absolute_value": generate_absolute_value_function,
    "tangent": generate_tangent_function
}

FUNCTION_TYPES = list(FUNCTION_GENERATORS.keys()) # auto update


def generate_and_compile_single(function_type, output_dir):
    """Generates and compiles a single function, returns metadata if successful."""
    if function_type not in FUNCTION_GENERATORS:
        print(f"Critical Error: Unknown function type requested: {function_type}")
        return None

    code_generator_fn = FUNCTION_GENERATORS[function_type]

    try:
        code, name_suffix = code_generator_fn()
        identifier = name_suffix
    except Exception as e:
        print(f"Error in {function_type} parameter generation: {e}")
        # Consider adding traceback for debugging:
        # import traceback
        # traceback.print_exc()
        return None

    output_base_path = os.path.join(output_dir, identifier)
    png_path, asy_path = compile_asymptote_to_image_with_path(code, output_base_path)

    if png_path and asy_path and os.path.exists(png_path) and os.path.exists(asy_path):
        return {
            "identifier": identifier,
            "image_path": png_path,
            "asy_path": asy_path,
            "function_type": function_type
        }
    else:
        return None


def generate_large_batch(total_samples_target, output_dir, metadata_filepath):
    """
    Generates samples up to total_samples_target, appending to existing metadata if found.
    Distributes generation among function types. Saves metadata periodically and at the end.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Created directory: {output_dir}")

    all_samples_metadata = []
    existing_identifiers = set()

    if os.path.exists(metadata_filepath):
        try:
            existing_df = pd.read_csv(metadata_filepath)
            existing_df['identifier'] = existing_df['identifier'].astype(str)
            existing_df.dropna(subset=['identifier'], inplace=True)
            all_samples_metadata = existing_df.to_dict('records')
            existing_identifiers = set(existing_df['identifier'])
            print(f"Resuming. Found {len(all_samples_metadata)} existing valid samples in metadata: {metadata_filepath}")
        except pd.errors.EmptyDataError:
            print(f"Metadata file {metadata_filepath} is empty. Starting fresh.")
        except Exception as e:
            print(f"Warning: Could not load or parse existing metadata file {metadata_filepath}: {e}. Starting fresh.")
            all_samples_metadata = []
            existing_identifiers = set()

    current_total_unique_samples = len(all_samples_metadata)

    if current_total_unique_samples >= total_samples_target:
        print(f"Target of {total_samples_target} samples already met or exceeded ({current_total_unique_samples} found). No new samples will be generated.")
        return all_samples_metadata

    new_samples_needed_overall = total_samples_target - current_total_unique_samples
    print(f"Current unique samples: {current_total_unique_samples}. Target: {total_samples_target}. Need to generate {new_samples_needed_overall} more unique samples.")

    num_function_types = len(FUNCTION_TYPES)
    if num_function_types == 0:
        print("No function types defined. Cannot generate samples.")
        return all_samples_metadata

    samples_per_type_newly_needed = new_samples_needed_overall // num_function_types
    remainder_newly_needed = new_samples_needed_overall % num_function_types

    new_generation_targets_per_type = {func_type: samples_per_type_newly_needed for func_type in FUNCTION_TYPES}
    for i in range(remainder_newly_needed):
        new_generation_targets_per_type[FUNCTION_TYPES[i % num_function_types]] += 1

    print(f"Targets for newly needed samples per type: {new_generation_targets_per_type}")

    total_newly_generated_this_session = 0
    start_time_session = time.time()

    for func_type_idx, func_type in enumerate(FUNCTION_TYPES):
        num_new_to_generate_for_this_type = new_generation_targets_per_type.get(func_type, 0)

        if num_new_to_generate_for_this_type <= 0 :
            print(f"Skipping {func_type} as target for new samples is {num_new_to_generate_for_this_type}.")
            continue

        print(f"\n--- Attempting to generate {num_new_to_generate_for_this_type} new unique samples of {func_type} ---")

        generated_for_type_this_session = 0
        attempts_for_type_this_session = 0
        max_attempts_per_needed_sample = 15

        while generated_for_type_this_session < num_new_to_generate_for_this_type:
            if len(all_samples_metadata) >= total_samples_target:
                print(f"   Overall target of {total_samples_target} samples reached. Stopping generation for {func_type}.")
                break

            if attempts_for_type_this_session >= num_new_to_generate_for_this_type * max_attempts_per_needed_sample :
                print(f"   Max attempts reached for {func_type} ({attempts_for_type_this_session} for {num_new_to_generate_for_this_type} needed). Moving to next type.")
                break

            attempts_for_type_this_session += 1
            metadata = generate_and_compile_single(func_type, output_dir)

            if metadata:
                if metadata['identifier'] not in existing_identifiers:
                    all_samples_metadata.append(metadata)
                    existing_identifiers.add(metadata['identifier'])
                    total_newly_generated_this_session += 1
                    generated_for_type_this_session += 1
                    print(f"   + {metadata['identifier']} ({func_type}) [{generated_for_type_this_session}/{num_new_to_generate_for_this_type}]")

                    if len(all_samples_metadata) % 50 == 0:
                        try:
                            pd.DataFrame(all_samples_metadata).to_csv(metadata_filepath, index=False)
                            print(f"    Interim metadata saved ({len(all_samples_metadata)} total unique records).")
                        except Exception as e_save:
                             print(f"    Error saving interim metadata: {e_save}")

            if attempts_for_type_this_session % 20 == 0 or generated_for_type_this_session == num_new_to_generate_for_this_type:
                 elapsed_time_session = time.time() - start_time_session
                 print(f"   [{func_type}] Progress: {generated_for_type_this_session}/{num_new_to_generate_for_this_type} new for type. "
                       f"Total unique so far: {len(all_samples_metadata)}/{total_samples_target}. "
                       f"Session time: {elapsed_time_session:.0f}s.")

        if len(all_samples_metadata) >= total_samples_target:
            print(f"\nTarget of {total_samples_target} unique samples reached. Ending batch generation early.")
            break

    if all_samples_metadata:
        metadata_df = pd.DataFrame(all_samples_metadata)
        try:
            metadata_df.to_csv(metadata_filepath, index=False)
            print(f"\nFinal metadata for {len(all_samples_metadata)} unique samples saved to {metadata_filepath}")
        except Exception as e:
            print(f"Error saving final metadata to {metadata_filepath}: {e}")
    else:
        print("\n No samples in metadata. Nothing saved.")

    end_time_session = time.time()
    print(f"\n Batch generation session complete.")
    print(f"   Total unique samples now in metadata: {len(all_samples_metadata)} (Target was {total_samples_target}).")
    print(f"   Generated {total_newly_generated_this_session} new unique samples in this session.")
    print(f"   Total time for this session: {(end_time_session - start_time_session)/60:.2f} minutes.")
    if os.path.exists(output_dir):
        print(f"   Image/Asy files stored in: {os.path.abspath(output_dir)}")
    return all_samples_metadata

print("✅ Large batch generation functions defined (robust resume and append, UPDATED FUNCTION_TYPES).")

✅ Large batch generation functions defined (robust resume and append, UPDATED FUNCTION_TYPES).


In [None]:
# Cell 6: Gen Large Batch of Samples (Append to existing samples)

# --- Config---
TOTAL_SAMPLES_GOAL = 12000  # <<<<<SAMPLE SIZE>>>>>

OUTPUT_BASE_DIR = "/content/drive/MyDrive/StarSparkProj"
DATA_DIR = os.path.join(OUTPUT_BASE_DIR, "AsymptoteDataset_Full")
METADATA_FILENAME = os.path.join(OUTPUT_BASE_DIR, "all_samples_metadata_Full.csv")

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)
    print(f"Created data directory: {DATA_DIR}")

print(f"Targeting up to {TOTAL_SAMPLES_GOAL} total unique samples.")
print(f"Data will be in: {DATA_DIR}")
print(f"Overall metadata will be in: {METADATA_FILENAME}")
print(f"Function types to be generated: {FUNCTION_TYPES}")

all_generated_metadata_list_of_dicts = generate_large_batch(
    total_samples_target=TOTAL_SAMPLES_GOAL,
    output_dir=DATA_DIR,
    metadata_filepath=METADATA_FILENAME
)

if all_generated_metadata_list_of_dicts:
    print(f"\nProcess complete. Total unique samples now in metadata: {len(all_generated_metadata_list_of_dicts)}.")
else:
    print("\nNo samples were generated or metadata process failed/resulted in empty list.")

Targeting up to 12000 total unique samples.
Data will be in: /content/drive/MyDrive/StarSparkProj/AsymptoteDataset_Full
Overall metadata will be in: /content/drive/MyDrive/StarSparkProj/all_samples_metadata_Full.csv
Function types to be generated: ['linear', 'quadratic', 'circle', 'ellipse', 'hyperbola', 'sine', 'absolute_value', 'tangent']
Resuming. Found 11996 existing valid samples in metadata: /content/drive/MyDrive/StarSparkProj/all_samples_metadata_Full.csv
Current unique samples: 11996. Target: 12000. Need to generate 4 more unique samples.
Targets for newly needed samples per type: {'linear': 1, 'quadratic': 1, 'circle': 1, 'ellipse': 1, 'hyperbola': 0, 'sine': 0, 'absolute_value': 0, 'tangent': 0}

--- Attempting to generate 1 new unique samples of linear ---


In [None]:
# Cell 7 (Splitting)
from sklearn.model_selection import train_test_split
import pandas as pd
import os

# --- Config for Split ---
OUTPUT_BASE_DIR_FOR_SPLIT = "/content/drive/MyDrive/StarSparkProj"
METADATA_FILE_TO_SPLIT = os.path.join(OUTPUT_BASE_DIR_FOR_SPLIT, "all_samples_metadata_Full.csv")
TRAIN_METADATA_FILEPATH = os.path.join(OUTPUT_BASE_DIR_FOR_SPLIT, "train_metadata_Full.csv")
VAL_METADATA_FILEPATH = os.path.join(OUTPUT_BASE_DIR_FOR_SPLIT, "val_metadata_Full.csv")

TEST_SPLIT_SIZE = 0.20 # 20% for val, 80% training

if not os.path.exists(METADATA_FILE_TO_SPLIT):
    print(f"❌ Metadata file not found: {METADATA_FILE_TO_SPLIT}")
    print("   Please run the generation cell (Cell 6) first to create/update it.")
else:
    print(f"Loading metadata from {METADATA_FILE_TO_SPLIT}...")
    try:
        all_data_df = pd.read_csv(METADATA_FILE_TO_SPLIT)
        # Ensure identifier is string
        if 'identifier' in all_data_df.columns:
            all_data_df['identifier'] = all_data_df['identifier'].astype(str)

    except pd.errors.EmptyDataError:
        print(f"❌ Metadata file '{METADATA_FILE_TO_SPLIT}' is empty. No data to split.")
        all_data_df = pd.DataFrame()

    if all_data_df.empty:
        print(f"❌ Metadata file '{METADATA_FILE_TO_SPLIT}' is effectively empty after loading. No data to split.")
    elif len(all_data_df) < 2 : # Needs at least 2 samples to split meaningfully and for stratify.
        print(f"❌ Metadata file '{METADATA_FILE_TO_SPLIT}' has less than 2 samples ({len(all_data_df)}). Cannot split.")
    elif 'function_type' not in all_data_df.columns:
            print("❌ 'function_type' column not found in metadata. Cannot stratify. Proceeding with non-stratified split.")
            train_df, val_df = train_test_split(
                all_data_df,
                test_size=TEST_SPLIT_SIZE,
                random_state=42 # For reproducibility
            )
    else: # Proceed with potential stratification
        print(f"Loaded {len(all_data_df)} records.")
        print(f"Splitting data (approx. {(1-TEST_SPLIT_SIZE)*100:.0f}% train, {TEST_SPLIT_SIZE*100:.0f}% validation)...")

        # Check for classes with fewer than 2 members before attempting to stratify
        type_counts = all_data_df['function_type'].value_counts()
        stratify_column = None
        if (type_counts >= 2).all(): # All classes have at least 2 members
            print("Stratifying by 'function_type'.")
            stratify_column = all_data_df['function_type']
        else:
            print("⚠️ Not all function types have at least 2 samples. Cannot stratify. Splitting without stratification.")
            print("   Counts per type:\n", type_counts[type_counts < 2])

        train_df, val_df = train_test_split(
            all_data_df,
            test_size=TEST_SPLIT_SIZE,
            stratify=stratify_column, # Will be None if stratification is not possible
            random_state=42
        )

        print(f"Training samples: {len(train_df)}")
        print(f"Validation samples: {len(val_df)}")

        try:
            train_df.to_csv(TRAIN_METADATA_FILEPATH, index=False)
            print(f"✅ Training metadata (re)created at {TRAIN_METADATA_FILEPATH}")
            val_df.to_csv(VAL_METADATA_FILEPATH, index=False)
            print(f"✅ Validation metadata (re)created at {VAL_METADATA_FILEPATH}")
        except Exception as e:
            print(f"❌ Error saving split metadata files: {e}")