### Goal: Solve the issue by using langchain and start a pull request

In [1]:
import re
import asyncio as aio

In [2]:
from dotenv import load_dotenv
from os import getenv

from autocoder.models import Repository
from autocoder.langchain_shortcuts import (
    get_response,
    get_file_paths,
    get_code_response,
    extract_files,
)

load_dotenv()


issue_url = "https://github.com/shroominic/VoiceGPT/issues/21"
repo_url, issue_number = issue_url.split("/issues/")

repo = Repository(repo_url, getenv("GITHUB_TOKEN"))
issue = repo.get_issue(issue_number)

A branch named 'issue-21' already exists. Switching to the existing branch.


#### Init prompt process

In [3]:
context = {
    "repo_name": issue.repository.name, 
    "repo_keywords": issue.repository.keywords, 
    "repo_description": issue.repository.description, 
    "issue_title": issue.title, 
    "issue_number": issue.issue_number,
    "issue_description": issue.body, 
    "code_tree": issue.repository.codebase.tree
}

coding_system_prompt = """
You are a coding assistant solving tasks for developers.
Tasks are represented as issues on GitHub repositories.
Be verbose and precise in your responses.

The following is a conversation about solving issue {issue_title} #{issue_number} on {repo_name}.
"""


#### Collect important files to gain context

In [4]:
important_files_prompt = """
Which files are important to read for gaining understanding of the codebase?

Codebase: 
{code_tree}

For example in a python project you might want to look at the 'requirements.txt' file to understand which technologies are used in the project.\n
# Reply like this and make sure pathes are in 'single quotes' and not "double quotes":
relevant_files = [
    'path/to/file1.txt',  # do not start with ./ or /
    'path/to/file2.js',
    ...
    # max 4 files so choose wisely
]
"""

relevant_file_paths = aio.create_task(get_file_paths(
    important_files_prompt, 
    "relevant_files",
    system_instruction=coding_system_prompt,
    validation=issue.codebase.validate_file_paths,
    **context
))

relevant_files_dict = {}
for path in relevant_file_paths:
    path = path.strip("'") # TODO: remove this 
    relevant_files_dict[path] = issue.codebase.show_file(path)

context["relevant_files"] = str(relevant_files_dict)

AttributeError: '_asyncio.Task' object has no attribute 'strip'


Error: Unrecognized request arguments supplied: code_tree, issue_description, issue_number, issue_title, repo_description, repo_keywords, repo_name 
Retrying...


Error: Unrecognized request arguments supplied: code_tree, issue_description, issue_number, issue_title, repo_description, repo_keywords, repo_name 
Retrying...


Error: Unrecognized request arguments supplied: code_tree, issue_description, issue_number, issue_title, repo_description, repo_keywords, repo_name 
Retrying...


Error: Unrecognized request arguments supplied: code_tree, issue_description, issue_number, issue_title, repo_description, repo_keywords, repo_name 
Retrying...


Error: Unrecognized request arguments supplied: code_tree, issue_description, issue_number, issue_title, repo_description, repo_keywords, repo_name 
Retrying...


Error: Unrecognized request arguments supplied: code_tree, issue_description, issue_number, issue_title, repo_description, repo_keywords, repo_name 
Retrying...


Error: Unrecognized r

Generate repository summary/context:

In [None]:
repo_summary_prompt = """
Summarize what this repository is about and what it does to have a better understanding and context of the codebase.
Repository name:
{repo_name}
Repository description:
{repo_description}
Repository keywords:
{repo_keywords}
Repository codebase:
{code_tree}
Relevant files: 
{relevant_files}

Describe the technologies used and the structure of the codebase.
Please be as detailed and precise as possible but keep it short.
"""

context["repo_summary"] = aio.run(get_response(
    repo_summary_prompt, 
    system_instruction=coding_system_prompt, 
    **context
))

Generate issue summary/context and decide which files to change and create:

In [None]:
issue_summary_prompt = """
Current Project Directory:
{code_tree}
Repository Summary:
{repo_summary}
Issue title: 
{issue_title}
Issue description: 
{issue_description}

Describe step by step (abstract) how to implement the issue and what files are relevant.
Be precise and keep it short.
"""

get_changes_prompt = """
Repository codebase:
{code_tree}
Repository description:
{repo_summary}
Issue description:
{issue_summary}

What files need to be changed to solve the issue?
What new files need to be created?

Write paths using 'single quotes'.
Make sure to put only file paths that exist in the codebase/project directory in files_to_change.
Respond with a codeblock like this:
```python
new_files = [
    'path/to/new_file1.py', # do not start with ./ or /
    'path/to/new_file2.ipynb',
    ...
]

files_to_change = [
    'path/to/file1.txt',  # do not start with ./ or /
    'path/to/file2.js',
    ...
]
```
"""

def get_files(retry=5):
    try:
        context["issue_summary"] = aio.run(get_response(
            issue_summary_prompt,
            system_instruction=coding_system_prompt,
            **context
        ))
        
        files_to_change = aio.run(get_response(
            get_changes_prompt,
            system_instruction=coding_system_prompt,
            **context
        ))

        new_files = extract_files(files_to_change, "new_files")
        files_to_change = extract_files(files_to_change, "files_to_change")

        if issue.codebase.validate_file_paths(files_to_change):
            raise ValueError("Some files to change do not exist in the codebase.")

        if any(issue.codebase.file_exists(f) for f in new_files): 
            raise ValueError("Some new files already exist in the codebase.")

        return new_files, files_to_change
    except Exception as e:
        if retry > 0:
            print("Retrying...", e)
            return get_files(retry=retry-1)


new_files, files_to_change = get_files()

print("New files:", new_files)
print("Files to change:", files_to_change)

Create new files and change already exsisting ones

In [None]:
new_file_prompt = """
Repository description:
{repo_summary}

Issue description:
{issue_summary}

Implement this file {file_path} to solve the issue.
After completion this the file will be created and added to the codebase.
Reply with a codeblock containing the content of the new file.
"""

new_files_dict = {}
for path in new_files:
    context["file_path"] = path
    
    new_file_content = aio.run(get_code_response(
        new_file_prompt,
        system_instruction=coding_system_prompt,
        **context
    ))
    
    new_files_dict[path] = new_file_content



change_file = """
Repository description:
{repo_summary}
Issue description:
{issue_summary}
First write a detailed instruction on how to change the file {file_path}.

# [Begin file content]
{file_content}
# [Eind file content]

"""
"""

Then create a list called "changes".
This list should contain tuples of the form (line_number, action, "new code").
Action can be one of the following:
- add: insert a new line of code at the line with the given number and shift all following lines down
- overwrite: overwrite the line with the given number
- delete: delete the line with the given number

Write down all changes and 
reply with a codeblock like this:
```python
# (line_number, 'action', 'new code')
# make sure use 'single quotes' and not "double quotes" or `backticks` like in this example:
changes = [
    (0, 'add', 'import newexample'),
    (4, 'overwrite', 'def foo():'),
    (5, 'add', '    print("hello world")'),
    (6, 'delete', '    print("bar")'),
    ...
]
```
"""

change_files_dict = {}
for path in files_to_change:
    context["file_path"] = path
    context["file_content"] = issue.codebase.show_file(path)
    
    changes_content = aio.run(get_code_response(
        change_file,
        system_instruction=coding_system_prompt,
        **context
    ))

    tuple_pattern = re.compile(r"\((\d+),\s*\'(.*?)\',\s*\'(.*?)\'\)", re.DOTALL)
    tuples = tuple_pattern.findall(changes_content)
    
    change_files_dict[path] = [
        (int(line), action, change) 
        for line, action, change in tuples
    ]


In [None]:
# TODO: Format code using chatgpt 
# and check if content is the same as the original file

In [None]:
for file, content in new_files_dict.items():
    issue.codebase.create_file(file, content)
    print("Created:", file, "\nwith content:", content, "\n")
    
for file, changes in change_files_dict.items():
    issue.codebase.change_file(file, changes)
    print("Changed:", file, "\nwith changes:", changes, "\n")

In [None]:
issue.codebase.commit_changes("Solved issue using GPT-4")

In [None]:
# TODO: Test code and repeat process if necessary

In [None]:
# TODO: Create a pull request