In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
import tinygen

In [10]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [11]:
githubUrl = "https://github.com/leoshimo/copier_python_template"
prompt = 'Add "Hello world" to the end of readme file'

# Github API

In [12]:
from dataclasses import dataclass
from typing import Optional, List


@dataclass(frozen=True)
class File:
    path: str
    content: Optional[str]

    def with_content(self, content: Optional[str]):
        return File(self.path, content)


class FileSystem:
    def list(self) -> List[File]:
        "List all files in filesystem"
        pass

    def open(self, path: str) -> Optional[File]:
        "Returns the file at path, or None if file was not found"
        pass

In [13]:
# github.py
import requests
from urllib.parse import urlparse
import base64


class GithubRepoFileSystem(FileSystem):
    def __init__(self, files: List[File]):
        self.files = files

    def list(self) -> List[File]:
        return self.files

    def open(self, path: str) -> Optional[File]:
        for f in self.files:
            if f.path == path:
                return f
        return None


def load_repo(repo_spec_or_url) -> GithubRepoFileSystem:
    "Create a GithubRepoFileSystem from given repository spec or URL"
    repo_spec = norm_repo_spec(repo_spec_or_url)
    files = get_repo_content(repo_spec, ".")
    return GithubRepoFileSystem(files)


def get_repo_content(repo_spec, path) -> List[File]:
    "Load all repo files recursively from PATH"
    # todo(leo): lazy fetch contents?
    r = requests.get(
        f"https://api.github.com/repos/{repo_spec}/contents/{path}",
        headers={
            "authorization": f"Bearer {os.getenv('GITHUB_API_KEY')}",
            "accept": "application/vnd.github+json",
        },
    )
    f = r.json()

    if isinstance(f, dict) and f["type"] == "file":
        return [File(f["path"], decode(f["content"]))]

    return [file for child in f for file in get_repo_content(repo_spec, child["path"])]


def norm_repo_spec(url_or_repo_spec):
    "Normalizes repository specified into format user/repo_name"
    res = urlparse(url_or_repo_spec).path.removeprefix("/")
    return res


def decode(content):
    bytes = content.encode("ascii")
    return base64.b64decode(bytes).decode("ascii")


repo = load_repo(githubUrl)

In [14]:
repo.open("notebooks/{{ project_name }}.ipynb.jinja")

File(path='notebooks/{{ project_name }}.ipynb.jinja', content='{\n "cells": [\n  {\n   "cell_type": "code",\n   "execution_count": 1,\n   "id": "89d54222-17ee-4ebf-8771-e2ff3f962be6",\n   "metadata": {},\n   "outputs": [],\n   "source": [\n    "%load_ext autoreload\\n",\n    "%autoreload 2"\n   ]\n  },\n  {\n   "cell_type": "code",\n   "execution_count": 2,\n   "id": "be9b0a94-904d-4f05-92b3-a7f487947c86",\n   "metadata": {},\n   "outputs": [],\n   "source": [\n    "import {{ project_name }}.hello"\n   ]\n  },\n  {\n   "cell_type": "code",\n   "execution_count": 3,\n   "id": "1e993dc4-f1c8-4976-b4e5-c5e8f05e8924",\n   "metadata": {},\n   "outputs": [\n    {\n     "data": {\n      "text/plain": [\n       "\'Hello world\'"\n      ]\n     },\n     "execution_count": 3,\n     "metadata": {},\n     "output_type": "execute_result"\n    }\n   ],\n   "source": [\n    "{{ project_name }}.hello.hello_world()"\n   ]\n  }\n ],\n "metadata": {\n  "kernelspec": {\n   "display_name": "Python 3 (ipyke

In [15]:
# test norm_repo_spec
assert (
    norm_repo_spec("leoshimo/copier_python_template")
    == "leoshimo/copier_python_template"
)
assert (
    norm_repo_spec("https://github.com/leoshimo/copier_python_template")
    == "leoshimo/copier_python_template"
)
assert (
    norm_repo_spec("https://github.com/leoshimo/copier_python_template?jibberish=hi")
    == "leoshimo/copier_python_template"
)

# Generating Unified Diff

Idea: A "Workspace" has:
- list of open files
- list of "edits"

In [16]:
import difflib


def unified_diff(old: Optional[File], new: Optional[File]):
    old_content = old.content if old and old.content else ""
    new_content = new.content if new and new.content else ""
    old_path = "a/" + old.path if old and old.content else "/dev/null"
    new_path = "b/" + new.path if new and new.content else "/dev/null"

    diff = difflib.unified_diff(
        old_content.splitlines(keepends=True),
        new_content.splitlines(keepends=True),
        fromfile=old_path,
        tofile=new_path,
    )

    diff_text = "".join(diff) + "\n"

    return diff_text

In [17]:
from typing import Dict


class Staging:
    def __init__(self, filesystem: FileSystem):
        self.edits: Dict[File] = {}
        self.filesystem = filesystem

    def add(self, f: Optional[File]):
        if f:
            self.edits[f.path] = f

    def diff(self):
        diffs = []
        for f in self.edits.values():
            old = self.filesystem.open(f.path)
            diffs.append(unified_diff(old, f))
        return "\n".join(diffs)

## Accumulating Changes

In [18]:
staging = Staging(repo)

# Editing README.md
staging.add(repo.open("README.md").with_content("Hello world"))

# Add NEW.md
staging.add(File("NEW.md", "New file"))

# Deleting copier.yaml
staging.add(File("copier.yaml", None))

with open("../diff.patch", "w") as f:
    content = staging.diff()
    print(content)
    f.write(content)

--- a/README.md
+++ b/README.md
@@ -1,37 +1 @@
-# README
-
-[Copier](https://copier.readthedocs.io/) template for a simple Python projects
-
-- Python project with `src` and `test` managed by `poetry`
-- `notebooks` for Jupyter notebooks
-- `Makefile` as task runner. See `make help`
-
-## Prerequisites
-
-- `pyenv`
-- `poetry`
-- `copier`
-
-## Project Setup
-
-```sh
-$ copier copy gh:leoshimo/copier_python_template my_project
-$ cd my_project
-$ make setup
-```
-
-## Running Tasks
-
-See `make help` for list of tasks.
-
-## API Keys
-
-Add `.env` with `SOME_API_KEY=API_KEY`. Then use `python-dotenv`:
-
-```python
-import os
-from dotenv import load_dotenv
-load_dotenv()
-os.environ.get('SOME_API_KEY')
-```
-
+Hello world

--- /dev/null
+++ b/NEW.md
@@ -0,0 +1 @@
+New file

--- a/copier.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-project_name:
-    type: str
-    help: Project name (snake_case)




# Tools

In [111]:
find_tool = Tool(
    "find",
    """find PATTERN
returns a list of paths that match given
PATTERN. PATTERN is a valid regular expression for Python's
re.compile.""",
    lambda arg: input(f'find({arg}): '),
)

grep_tool = Tool(
    "grep",
    """grep PATTERN returns a list of paths to files containing given
PATTERN. PATTERN is a valid regular expression for Python's
re.compile.""",
    lambda arg: input(f'grep({arg}): '),
)

open_tool = Tool(
    "open",
    """open PATH
returns contents of file at PATH.""",
    lambda arg: input(f'open({arg}): '),
)

write_tool = Tool(
    "write",
    """write PATH CONTENT
write CONTENT to the file at PATH. This overwrites existing content if any.""",
    lambda arg: input(f'write({arg}): '),
)

tools = [find_tool, grep_tool, open_tool, write_tool]

In [107]:
import tinygen.tool

print(tinygen.tool.describe_tools(tools))

find PATTERN
returns a list of paths that match given
PATTERN. PATTERN is a valid regular expression for Python's
re.compile.

grep PATTERN returns a list of paths to files containing given
PATTERN. PATTERN is a valid regular expression for Python's
re.compile.

open PATH
returns contents of file at PATH.

write PATH CONTENT
write CONTENT to the file at PATH. This overwrites existing content if any.


# Planning

- Plan + Self-Reflection: https://platform.openai.com/playground/p/jxYEE69LEHREHqf9e7I6LgGj


In [78]:
task = 'The programming task: Add a new FastAPI endpoint for "/api/diff_me" that accepts githubUrl and prompt as query parameters'
the_plan = tinygen.plan(tools, task)

In [79]:
print(the_plan)

Plan:
1. Use `find` tool with `\.py$` pattern to find Python files.
2. Use `grep` tool with `FastAPI\(\)` pattern to find the file containing the FastAPI app instance.
3. Use `open` tool to get the contents of the found file.
4. Add the new endpoint to the file contents:
```
@app.get("/api/diff_me")
def diff_me(githubUrl: str, prompt: str):
    # endpoint logic here
```
5. Use `write` tool to save the changes to the file.


# Plan Execution
- Execute (React): https://platform.openai.com/playground/p/BZp876JtEj3ss8SLqI2Acs85

In [164]:
def create_agent_msgs(tools: List[Tool], task: str, plan: str) -> List[dict]:
    system_prompt = """You exist in an environment where you can interact with the file system using distinct tools below:
    
{tool_descriptions}

Given a programming task and a step-by-step sequence of actions using the tools described above, the objective is to complete the task.

Each assistant response should be formatted as follows:

OBSERVATION: <Observation about preceding user message in context of accomplishing the programming task>
THOUGHT: <Thought on next step to accomplish programming task, following the step-by-step sequence of the plan.
ACTION: <Specifies one of above tool to use with arguments to pass to tools>

When the task is completed, the ACTION should be "ACTION: DONE"

Examples:

OBSERVATION: The user provided a programming task to "Add Hello World to the bottom of the README file", with the plan to find the file, open the file, then writing to file.
THOUGHT: First, I should find the README file.
ACTION: find "README"

RESULT: "./README.md"

OBSERVATION: There is a file named "./README.md" at path "./README.md"
THOUGHT: Next, I should open the file I found.
ACTION: open "./README.md"

RESULT: "# README\nThis project is used as an example file"

OBSERVATION: The README.md file contains the text above.
THOUGHT: I should append to the file using the write tool using the existing contents and "Hello World" string
ACTION: write "./README.md" "# README\nThis project is used as an example file\nHello World"

RESULT: OK

OBSERVATION: The README.md file was updated.
THOUGHT: I have updated the README.md file. The task is complete.
ACTION: DONE
    """.format(tool_descriptions=describe_tools(tools))

    user_msg = """The Task: 
{task}

The Plan is:
{plan}
    """.format(task=task, plan=plan)

    return [
        { 'role': 'system', 'content': system_prompt },
        { 'role': 'user', 'content': user_msg },
    ]
    

In [200]:
import re

def print_message(msg: dict):
    print('*** {role}:\n{content}'.format(role = msg['role'].upper(), content = msg['content']))

class Agent:
    
    def __init__(self, tools: List[Tool], task: str, plan: str, debug=False):
        self.tools = tools
        self.plan = plan
        self.messages = create_agent_msgs(tools, task, plan)
        self.debug = debug

    def run(self) -> bool:
        """
        Execute agent to completion
        """
        turns = 0

        if self.debug:
            for m in self.messages:
                print_message(m)
        
        while turns < 10:
            resp = openai.ChatCompletion.create(model="gpt-4", temperature=0, messages=self.messages)
            assistant_msg = resp.choices[0].message
            self.messages.append(assistant_msg)

            if self.debug:
                print_message(assistant_msg)
            
            turns += 1

            if turns == 1:
                return True
            

In [166]:
agent = Agent(tools, task, the_plan, debug=True)

In [167]:
agent.run()

*** SYSTEM:
You exist in an environment where you can interact with the file system using distinct tools below:
    
find PATTERN
returns a list of paths that match given
PATTERN. PATTERN is a valid regular expression for Python's
re.compile.

grep PATTERN returns a list of paths to files containing given
PATTERN. PATTERN is a valid regular expression for Python's
re.compile.

open PATH
returns contents of file at PATH.

write PATH CONTENT
write CONTENT to the file at PATH. This overwrites existing content if any.

Given a programming task and a step-by-step sequence of actions using the tools described above, the objective is to complete the task.

Each assistant response should be formatted as follows:

OBSERVATION: <Observation about preceding user message in context of accomplishing the programming task>
THOUGHT: <Thought on next step to accomplish programming task, following the step-by-step sequence of the plan.
ACTION: <Specifies one of above tool to use with arguments to pass t

True

In [169]:
assistant_msg = agent.messages[2]

In [199]:
extract_action({ 'content': 'ACTION: find "\.py$"' })

Action(name='find', args=['"\\.py$"'])