## Generate dataset

In [2]:
import random
import pandas as pd
import sympy as sp

class ExtendedAlgebraProblemGenerator:
    def __init__(self, num_samples=1000, output_file="simple_quadratic_problems.csv"):
        self.num_samples = num_samples
        self.output_file = output_file

    def generate_linear_problem(self):
        a = random.randint(1, 10)
        b = random.randint(1, 10)
        c = random.randint(1, 20)
        question = f"If {a} times a number plus {b} equals {c}, what is the number?"
        answer = (c - b) / a
        return question, round(answer, 2)

    def generate_quadratic_problem(self):
        a = random.randint(1, 5)  # Coefficient of x^2
        b = random.randint(0, 10)  # Coefficient of x
        c = random.randint(0, 20)  # Constant

        # Solve for x using Sympy
        x = sp.symbols('x')
        solutions = sp.solve(a * x**2 + b * x + c, x)

        # Keep real solutions only
        real_solutions = [sol.evalf() for sol in solutions if sol.is_real]
        if not real_solutions:
            return self.generate_quadratic_problem()  # Regenerate if no real solutions

        # Convert the equation into a word problem
        question = f"The square of a number, multiplied by {a}, plus {b} times the number, plus {c} equals 0. What is the number?"
        answer = ", ".join([str(round(sol, 2)) for sol in real_solutions])

        return question, answer

    def generate_dataset(self):
        data = []
        for _ in range(self.num_samples):
            data.append(self.generate_quadratic_problem())
        df = pd.DataFrame(data, columns=["Problem", "Solution"])
        return df

    def save_dataset(self):
        df = self.generate_dataset()
        df.to_csv(self.output_file, index=False)
        print(f"Dataset saved to {self.output_file}")

# Example usage
if __name__ == "__main__":
    generator = ExtendedAlgebraProblemGenerator(num_samples=500)
    generator.save_dataset()

Dataset saved to simple_quadratic_problems.csv
