In [None]:
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
from nbconvert import NotebookExporter
import os
import csv
os.environ['PYDEVD_DISABLE_FILE_VALIDATION'] = '1'

In [None]:
def data_generator(notebook_path, output_csv_path, executions):

    exception_count = 0
    num_correct = 0
    iterations = 0

    seen_questions = {}

    with open(output_csv_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        # Write header only once, at the beginning
        writer.writerow(["question", "solution", "question_type", "answer_type",\
            "extracted_answer",\
            "small_eval_point", "small_analytical", "small_numerical",\
            "large_eval_point", "large_analytical", "large_numerical"])
    
    while num_correct < executions:
        iterations += 1
        try:
            with open(notebook_path) as f:
                nb = nbformat.read(f, as_version=4)
                ep = ExecutePreprocessor(timeout=10, kernel_name='python3')
                ep.preprocess(nb, {'metadata': {'path': os.path.dirname(notebook_path)}})

                problem_cell = nb.cells[-3]
                problem_output = problem_cell['outputs'][0]['text'] if problem_cell['outputs'] else "No Problem Output"

                if problem_output in seen_questions:
                    raise ValueError("Duplicate problem detected.")
                else:
                    seen_questions[problem_output] = True
                
                solution_cell = nb.cells[-1]
                solution_output = solution_cell['outputs'][0]['text'] if solution_cell['outputs'] else "No Solution Output"

                problem_type = "ODE"

                solution_type = "list"

                small_x_eval_point_output = 0.1

                both_solutions_cell = nb.cells[-5]
                both_solutions_output = both_solutions_cell['outputs'][0]['text'] if both_solutions_cell['outputs'] else "No Both Solutions"

                large_solution_cell = nb.cells[-7]
                large_solution_output = large_solution_cell['outputs'][0]['text'] if large_solution_cell['outputs'] else "No Small Solution"
                
                small_solution_cell = nb.cells[-9]
                small_solution_output = small_solution_cell['outputs'][0]['text'] if small_solution_cell['outputs'] else "No Large Solution"

                small_x_approx_solution_cell = nb.cells[-17]
                small_x_approx_solution_output = small_x_approx_solution_cell['outputs'][0]['text'] if small_x_approx_solution_cell['outputs'] else "No small x approx Solution"

                large_x_approx_solution_cell = nb.cells[-15]
                large_x_approx_solution_output = large_x_approx_solution_cell['outputs'][0]['text'] if large_x_approx_solution_cell['outputs'] else "No Large x approx Solution"

                small_x_num_solution_cell = nb.cells[-13]
                small_x_num_solution_output = small_x_num_solution_cell['outputs'][0]['text'] if small_x_num_solution_cell['outputs'] else "No small x numerical Solution"

                large_x_num_solution_cell = nb.cells[-11]
                large_x_num_solution_output = large_x_num_solution_cell['outputs'][0]['text'] if large_x_num_solution_cell['outputs'] else "No Large x numerical Solution"

                large_x_eval_point_cell = nb.cells[-19]
                large_x_eval_point_output = large_x_eval_point_cell['outputs'][0]['text'] if large_x_eval_point_cell['outputs'] else "No large x eval point"

                # Writing/Appending to the CSV
                with open(output_csv_path, 'a', newline='', encoding='utf-8') as csvfile:
                    writer = csv.writer(csvfile)
                    writer.writerow([problem_output, solution_output, problem_type, solution_type,\
                        both_solutions_output,
                        small_x_eval_point_output, small_x_approx_solution_output, small_x_num_solution_output,\
                        large_x_eval_point_output, large_x_approx_solution_output, large_x_num_solution_output])

            num_correct += 1
            print(f"num_correct: {num_correct}")
                                
        except Exception as e:
            exception_count += 1
            print(f'Error during iteration {iterations}')
            continue  # Skip the rest of this loop iteration and continue with the next

        print(f'Execution {num_correct}/{iterations} completed and output appended to CSV.')
        print(f'CSV file has been updated at: {os.path.abspath(output_csv_path)}')
    print(f"There were {num_correct}/{iterations} successes.")

In [None]:
notebook_path = "ODE_solver.ipynb"
output_csv_path = 'ODE_train_dataset.csv'
data_generator(notebook_path, output_csv_path, 150)