In [1]:
import sys
import random
# from transformers import AutoTokenizer
from datasets import load_dataset, Dataset
from tqdm import tqdm, trange
import time
import os
import shutil
import json
import re
from pprint import pp, pformat as pf
import demjson3 as demjson
from IPython.display import display, Markdown, Latex, HTML

from llm_fns.llm import llm_chat, UserMsg, SystemMsg
from llm_fns.pmemo import memo, memo_cache_delete, memo_cache_delete_bytag
from docker_env import DockerEnv

In [2]:
# memo_cache_delete_bytag(tag="test0")

llm_chat(
    "test: count from 1 to 3",
    model="llama3-8b-8192",
    use_cache=True,
    cache_tag="test0",
)

'1, 2, 3'

In [3]:
# just a few drafts to try glan-like toplevel field taxonomy gen

def STUDY_FIELD_A0(field):
    return f"I need to study {field}. List all subfields of {field} as well as all necessary foundational and applied knowledge and capabilities inherent to {field} in systematic & exhaustive manner. Use markdown headers to express hierarchical nature of the answer and format the constituents as markdown bullet lists."


STUDY_FIELD_FOLLOWUP = """
Now step back for a second and analyze your answer to my previous request, providing constructive and unbiased criticism.
Pay special attention to these aspects of your answer:
1. Your answer should be exhaustive.
2. It should not contain abridged parts such as "etc.", "..." - you should write everything explicitly, even if it will have to be fit in several messages.
3. It should not include "fluff" points unnecessary for the student to achieve all capabilities expected from a master of this field.
""".strip()

STUDY_FIELD_FOLLOWUP_FINAL = "Now output the final complete correct version of the list, integrating all relevant critcism and keeping the good parts of original answer intact"


def toplevel_syllabus(field, sysprompt=None, model="llama3-70b-8192", **kwargs):
    msgs = []

    if sysprompt:
        msgs.append(dict(role="system", content=sysprompt))

    msgs = [
        dict(
            role="user",
            content=STUDY_FIELD_A0(field),
        ),
    ]

    followups = [STUDY_FIELD_FOLLOWUP, STUDY_FIELD_FOLLOWUP_FINAL]

    for f in followups:
        msgs.append(dict(role="user", content=f))
        next_msg = llm_chat(
            msgs,
            model=model,
            **kwargs,
        )
        msgs.append(dict(role="assistant", content=next_msg))

    return msgs[-1]["content"]


ret = toplevel_syllabus("software engineering", use_cache=True, cache_tag="glan_dev_v0")

In [4]:
# print(ret)

In [5]:
def GEN_TAXONOMY(field):
    return f"""
Provide a natural high-level taxonomy of the {field} field suitable for designing an education program.
    """.strip()


def GEN_TAXONOMY_A1(field):
    return f"""
Provide a natural high-level taxonomy of the {field} field from practitioner\'s point of view. The taxonomy should be suitable for designing an education program.
    """.strip()


def GEN_HANDS_ON_CURRICULUM_A1(field):
    return f"""
Design an educational software project
    """.strip()


print(
    llm_chat(
        [UserMsg(GEN_TAXONOMY_A1("python software engineering"))],
        model="llama3-70b-8192",
    )
)

What a great question!

After conducting research and consulting with practitioners, I've developed a natural high-level taxonomy of the Python software engineering field from a practitioner's point of view. This taxonomy can serve as a foundation for designing an education program.

**Level 1: Foundational Skills**

* **Programming Fundamentals**: Understanding of Python syntax, data structures, control structures, functions, and object-oriented programming concepts.
* **Development Tools**: Familiarity with integrated development environments (IDEs), text editors, and version control systems (e.g., Git).

**Level 2: Core Competencies**

* **Data Structures and Algorithms**: Knowledge of data structures (e.g., lists, dictionaries, sets) and algorithms (e.g., sorting, searching) in Python.
* **Software Design Patterns**: Understanding of creational, structural, and behavioral design patterns in Python.
* **Testing and Debugging**: Familiarity with unit testing, integration testing, and

In [7]:
def GEN_EDU_PROBLEMS_A0(lang="python", ext="py", comment="#", project_kind="cli tool"):
    return f"""
    As a programming mentor create a self-contained {lang} {project_kind} project and then use it as a base to create an educational programming/software engineer task that will require the student to read and understand the project and make 1-3 reasoning steps to solve correctly. You should output:
    1. A concise plan for this task which starts from the core competencies and concepts the student will be required to master to pass.
    2. A source code for a standalone {lang} {project_kind} project as a single {lang} file.
    3. An educational programming/swe task expressed in English which should require the student some mastery over {lang}, the project and general software engineering skills to solve.
    4. A unit test named test_task.{ext} that can be used to automatically verify the student has solved the task - it should fail in the current state and it should pass when the task is solved.

    When you output a {lang} source code file ALWAYS output the the correct filename I should save it into like this: ```{lang}\n{comment} test_task.{ext}
    Code carefully ensuring the {lang} files and unittests you code are correct and do not contain any bugs preventing the project from being executed.
    Now start with the plan and then execute the rest of my task:
    """.strip()


def GEN_EDU_PROJECT_A0(lang="python", project_file="project.py", ext="py", comment="#", project_kind="cli app"):
    return f"""
    As a programming mentor create a self-contained {lang} {project_kind} project which we will then use as a base to create educational programming tasks. Ensure the project is correct, complete and is interesting and non-trivial enough to provide educational value to the students who will work on it. You should output:
    1. A concise plan for this task which starts from the core competencies and concepts the student will be required to master to work on this project.
    2. A source code file named {project_file} containing a standalone {lang} {project_kind} project implementation as a single {lang} file.
    3. A set of unit tests named test_project.{ext} that reliably covers the essential functionality of the project. Pay attention to unit test correctness as we will later use these tests to automatically verify students\' performance.
    4. If the project requires {lang} libraries not contained in {lang} standard library to work you have to provide a shell script named install.sh which should install the necessary libraries. You should not provide install.sh script otherwise.

    # Important notes!
    When you output a source code file you have to ALWAYS prefix the codeblock with markdown header like this: ### FILE: <complete_file_name_here>. I will save the codeblock content into the appropriately named files.

    Code carefully ensuring the {lang} files and unittests you code are correct and do not contain any bugs preventing the project from being executed.

    Now start with the plan and then execute the rest of my task:
    """.strip()


ret = llm_chat(
    GEN_EDU_PROJECT_A0(), model="llama3-70b-8192", use_cache=True, cache_tag="prtask_v0"
)

# display(Markdown(ret))

def extract_codeblocks(text, require_name=True):
    """
    Extract Markdown code blocks from a string

    :param text: String containing Markdown code
    :param require_name: If True, also match the filename prefix (# FILE: <filename>\n)
    :return: List of extracted code blocks (and filenames if require_name is True)
    """
    if require_name:
        codeblock_pattern = r"#+\s+FILE:\s*([^\n]*)\s*\n*```([^\n]*)?\n([^```]+)```"
        codeblocks = re.findall(codeblock_pattern, text)
        codeblocks = [
            (code, filename.strip(), lang) for filename, lang, code in codeblocks
        ]
    else:
        codeblock_pattern = r"```([^\n]*)?\n([^```]+)```"
        codeblocks = re.findall(codeblock_pattern, text, re.DOTALL)
        codeblocks = [code for code in codeblocks]
    return codeblocks


def print_code(src, language=""):
    _src = src.rstrip("\n")
    display(Markdown(f"```{language}\n{_src}\n```"))

def debug_dict(d, maxlen=80):
    return {k: str(v)[:maxlen] + "..." if len(str(v)) > maxlen else str(v) for k, v in d.items()}

def print_json(src, shorten=False, maxlen=80):
    if shorten:
        src = debug_dict(src, maxlen=maxlen)
    print_code(json.dumps(src, indent=2), language="json")

def parse_files(txt, require_filename=True, verbose=False):
    cbs = extract_codeblocks(txt)
    ret = {}
    for cb in cbs:
        if require_filename:
            assert isinstance(cb[1], str)
        
        ret[cb[1]] = cb[0]
        
        if verbose:
            print("-" * 80)
            print(f"FILE: {cb[1]}")
            print("-" * 80)
            print_code(cb[0], language=cb[2])
    
    return ret


def _verify_project(
    sources, tests, installers=None, async_mode=False, start_cmd=None, stop_cmd=None
):
    denv = DockerEnv()

    # Copy source files into the Docker environment
    denv.copy_files(file_dict=sources)

    # Copy test files into the Docker environment
    denv.copy_files(file_dict=tests)

    # Run installers
    installer_outputs = []
    if installers and len(installers):
        for installer_path, installer_content in installers.items():
            denv.copy_files(file_dict={installer_path: installer_content})
            installer_output, installer_exit_code = denv.run_command(
                f"bash {installer_path}"
            )
            installer_outputs.append(
                {
                    "path": installer_path,
                    "output": installer_output,
                    "status": "OK" if installer_exit_code == 0 else "FAILED",
                }
            )
    
    tests_passed = False

    if async_mode:
        # Start the app
        start_output, start_exit_code = denv.run_command(start_cmd)
        if start_exit_code != 0:
            print("Error starting the app: ", start_output)
            denv.cleanup()
            return

        # Run the tests
        test_output, test_exit_code = denv.run_command("python -m unittest discover -v")
        tests_passed = test_exit_code == 0

        # Stop the app
        stop_output, stop_exit_code = denv.run_command(stop_cmd)

    else:
        # Run the tests
        test_output, test_exit_code = denv.run_command("python -m unittest discover -v")
        tests_passed = test_exit_code == 0

    # Create a JSON log with the results
    log = {
        "installers": installer_outputs,
        "tests": {"pass": tests_passed, "output": test_output},
    }

    if async_mode:
        log["app"] = {
            "start": {
                "output": start_output,
                "status": "OK" if start_exit_code == 0 else "FAILED",
            },
            "stop": {
                "output": stop_output,
                "status": "OK" if stop_exit_code == 0 else "FAILED",
            },
        }

    # Clean up
    del denv

    return tests_passed, log

# print(ret)
# parse_files(ret)

def verify_gen_project(generated_text, project_file="project.py", test_file="test_project.py", install_file="install.sh", verbose=False):
    files = parse_files(ret)
    runner_kwargs={}
    if (project_file in files and test_file in files):
        if install_file in files:
            runner_kwargs["installers"] = {install_file: files[install_file]}
        runner_kwargs["sources"]={project_file: files[project_file]}
        runner_kwargs["tests"]={test_file: files[test_file]}

        if verbose:
            print("RUNNER INPUT <<<")
            print_json(runner_kwargs)
        
        success, logs = _verify_project(**runner_kwargs)

        if verbose:
            print("RUNNER OUTPUT >>>")
            print(logs)

verify_gen_project(parse_files(ret), verbose=True)


RUNNER INPUT <<<


```json
{
  "sources": {
    "project.py": "import argparse\nimport json\nimport os\n\nTASKS_FILE = 'tasks.json'\n\ndef load_tasks():\n    if os.path.exists(TASKS_FILE):\n        with open(TASKS_FILE, 'r') as f:\n            return json.load(f)\n    return []\n\ndef save_tasks(tasks):\n    with open(TASKS_FILE, 'w') as f:\n        json.dump(tasks, f)\n\ndef add_task(task):\n    tasks = load_tasks()\n    tasks.append(task)\n    save_tasks(tasks)\n\ndef list_tasks():\n    tasks = load_tasks()\n    for i, task in enumerate(tasks, 1):\n        print(f\"{i}. {task}\")\n\ndef delete_task(index):\n    tasks = load_tasks()\n    try:\n        del tasks[index - 1]\n        save_tasks(tasks)\n    except IndexError:\n        print(\"Invalid task index\")\n\ndef main():\n    parser = argparse.ArgumentParser(description='Task Manager')\n    subparsers = parser.add_subparsers(dest='command')\n\n    add_parser = subparsers.add_parser('add', help='Add a new task')\n    add_parser.add_argument('task', help='Task description')\n\n    list_parser = subparsers.add_parser('list', help='List all tasks')\n\n    delete_parser = subparsers.add_parser('delete', help='Delete a task')\n    delete_parser.add_argument('index', type=int, help='Task index')\n\n    args = parser.parse_args()\n\n    if args.command == 'add':\n        add_task(args.task)\n    elif args.command == 'list':\n        list_tasks()\n    elif args.command == 'delete':\n        delete_task(args.index)\n\nif __name__ == '__main__':\n    main()\n"
  },
  "tests": {
    "test_project.py": "import unittest\nfrom project import add_task, list_tasks, delete_task\nimport json\nimport os\n\nclass TestTaskManager(unittest.TestCase):\n    def setUp(self):\n        if os.path.exists('tasks.json'):\n            os.remove('tasks.json')\n\n    def test_add_task(self):\n        add_task('Task 1')\n        with open('tasks.json', 'r') as f:\n            tasks = json.load(f)\n        self.assertEqual(tasks, ['Task 1'])\n\n    def test_list_tasks(self):\n        add_task('Task 1')\n        add_task('Task 2')\n        output = io.StringIO()\n        with redirect_stdout(output):\n            list_tasks()\n        self.assertIn('1. Task 1', output.getvalue())\n        self.assertIn('2. Task 2', output.getvalue())\n\n    def test_delete_task(self):\n        add_task('Task 1')\n        add_task('Task 2')\n        delete_task(1)\n        with open('tasks.json', 'r') as f:\n            tasks = json.load(f)\n        self.assertEqual(tasks, ['Task 2'])\n\nif __name__ == '__main__':\n    unittest.main()\n"
  }
}
```

RUNNER OUTPUT >>>
