### Goal: Solve the issue by using langchain and start a pull request

In [None]:
import re
from os import getenv
from dotenv import load_dotenv
load_dotenv()

from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from autocoder.models import Repository

In [None]:
issue_url = "https://github.com/shroominic/VoiceGPT/issues/5"
repo_url, issue_number = issue_url.split("/issues/")

repo = Repository(repo_url, getenv("GITHUB_TOKEN"))
issue = repo.get_issue(issue_number)

#### Init prompt process

In [None]:
chatgpt = ChatOpenAI(model="gpt-4", verbose=True, request_timeout=60*5, )
chatgpt3 = ChatOpenAI(model="gpt-3.5-turbo", verbose=True, request_timeout=60*2)

In [None]:
info_dict = {
    "repo_name": issue.repository.name, 
    "repo_keywords": issue.repository.keywords, 
    "repo_description": issue.repository.description, 
    "issue_title": issue.title, 
    "issue_number": issue.issue_number,
    "issue_description": issue.body, 
    "code_tree": issue.repository.codebase.tree
}

coding_system_prompt = SystemMessagePromptTemplate.from_template(
    template=
        "You are a coding assistant solving tasks for developers."
        "Tasks are represented as issues on GitHub repositories."
        "Be verbose and precise in your responses.\n"
        "The following is a conversation about solving issue {issue_title} #{issue_number} on {repo_name}.\n"
)

#### Collect important files to gain context

In [None]:
important_files = HumanMessagePromptTemplate.from_template(template=
"""
Which files are important to read for gaining understanding of the codebase?\n
Codebase: \n{code_tree}\n
For example in a python project you might want to look at the 'requirements.txt' file to understand which technologies are used in the project.\n
# Reply like this and make sure pathes are in 'single quotes' and not "double quotes":
relevant_files = [
    'path/to/file1.txt',  # do not start with ./ or /
    'path/to/file2.js',
    ...
    # max 4 files so choose wisely
]
""")

get_important_files = ChatPromptTemplate.from_messages([coding_system_prompt, important_files])

files_chain = LLMChain(llm=chatgpt, prompt=get_important_files, verbose=True)
files_ok = False
tries = 0
while not files_ok:
    relevant_files_response = files_chain.run(info_dict)
    if relevant_files_response:
        print(relevant_files_response)
        relevant_file_paths = re.findall(r"'.*?'", relevant_files_response)
        files_ok = issue.codebase.validate_file_paths(relevant_file_paths)
    else: 
        print("No files found try again", relevant_files_response)
        tries += 1
    if tries > 5: raise Exception("No files found")
    
print("Relevant Files:", relevant_file_paths)

relevant_files_dict = {}
for file in relevant_file_paths:
        file_content = issue.codebase.show_file(file.strip("'"))
        relevant_files_dict[file] = file_content

info_dict["relevant_files"] = str(relevant_files_dict)

Generate repository summary/context:

In [None]:
repo_summary = HumanMessagePromptTemplate.from_template(
    template=
        "Summarize what this repository is about and what it does.\n"
        "Repository name: {repo_name}\n"
        "Repository description: {repo_description}\n"
        "Repository keywords: {repo_keywords}\n"
        "Repository codebase: \n{code_tree}\n"
        "Relevant files: \n{relevant_files}\n"
        "Describe the technologies used and the structure of the codebase. "
        "Please be as detailed and precise as possible but keep it short.\n"
    )
get_repo_summary = ChatPromptTemplate.from_messages([coding_system_prompt, repo_summary])
repo_summary = LLMChain(llm=chatgpt, prompt=get_repo_summary, verbose=True).run(info_dict)
info_dict["repo_summary"] = repo_summary
repo_summary

Generate issue summary/context:

In [None]:
issue_summary = HumanMessagePromptTemplate.from_template(
    template=
        "Repository Summary: \n{repo_summary}\n"
        "Issue title: {issue_title}\n"
        "Issue description: {issue_description}\n"
        "Describe step by step (abstract) how to implement the issue and what files are relevant. "
        "Be precise and keep it short.\n"
    )
get_issue_summary = ChatPromptTemplate.from_messages([coding_system_prompt, issue_summary])
issue_summary = LLMChain(llm=chatgpt, prompt=get_issue_summary, verbose=True).run(info_dict)
info_dict["issue_summary"] = issue_summary
issue_summary

Decide which files to change and create

In [None]:
output = ["'src/web/main.py'", "'src/web/templates/pricing.html'"]
issue.codebase.validate_file_paths(output)



In [None]:
prepare_changes = HumanMessagePromptTemplate.from_template(
    template=
    "Codebase:\n{code_tree}\n"
    "Repository description: {repo_summary}\n"
    "Issue description: {issue_summary}\n"
    "What files need to be changed to solve the issue?\n"
    "What new files need to be created?\n"
    
    "Reply like this:\n"
    "files_to_change = [\n"
    "    'path/to/file1.txt',  # do not start with ./ or /\n"
    "    'path/to/file2.js',\n"
    "    ...\n"
    "]\n"
    "new_files = [\n"
    "    'path/to/new_file1.py',\n"
    "    ...\n"
    "]\n"
    "# write paths using 'single quotes', not \"double quotes\" and not `backticks`\n"
    "# make sure to put only file paths that exist in the codebase in files_to_change\n"
)
get_files_to_change = ChatPromptTemplate.from_messages([coding_system_prompt, prepare_changes])

what_files_to_change = LLMChain(llm=chatgpt, prompt=get_files_to_change, verbose=True)
files_ok = False
tries = 0
while not files_ok:
    changes = what_files_to_change.run(info_dict)
    print("OUTPUT:", changes)
    files_to_change = re.search(r"files_to_change = \[(.*?)\]", changes, re.DOTALL)
    new_files = re.search(r"new_files = \[(.*?)\]", changes, re.DOTALL)
    print(files_to_change.group(1) if files_to_change else "No files to change")
    print(new_files.group(1) if new_files else "No new files")
    if new_files:
        new_file_paths = re.findall(r"\'(.*?)\'", new_files.group(1))
        change_file_paths = re.findall(r"\'(.*?)\'", files_to_change.group(1))
        files_ok = issue.codebase.validate_file_paths(change_file_paths)
        print("Validadte:", change_file_paths)
        print("Files OK: ", files_ok)
    elif files_to_change:
        change_file_paths = re.findall(r"\'(.*?)\'", files_to_change.group(1))
        files_ok = issue.codebase.validate_file_paths(change_file_paths)
        print("No files found try again", files_to_change)
        tries += 1
    if tries > 5: raise Exception("No files found")

print("Files to change:\n", change_file_paths, "\n")
print("New Files:\n", new_file_paths, "\n")

In [None]:
new_file = ChatPromptTemplate.from_messages([
    coding_system_prompt, 
    HumanMessagePromptTemplate.from_template(
        template=
            "Repository description: {repo_summary}\n"
            "Issue description: {issue_summary}\n"
            "Implement this file {file_path} to solve the issue.\n"
            "After completion this the file will be created and added to the codebase.\n"
            "Reply with a codeblock containing the content of the new file.\n")])

new_files_dict = {}
for file_path in new_file_paths:
    info_dict["file_path"] = file_path
    new_file_content = LLMChain(llm=chatgpt, prompt=new_file, verbose=True).run(info_dict)
    print(new_file_content)
    
    # Extract the codeblock
    new_content = re.search(r'```python(.*?)```', new_file_content, re.DOTALL).group(1)
    
    new_files_dict[file_path] = new_content



change_file = ChatPromptTemplate.from_messages([
    coding_system_prompt, 
    HumanMessagePromptTemplate.from_template(
        template=
"""
Repository description:
{repo_summary}
Issue description:
{issue_summary}
First write a detailed instruction on how to change the file {file_path}.

# [Begin file content]
{file_content}
# [Eind file content]

"""
"""

Then create a list called "changes".
This list should contain tuples of the form (line_number, action, "new code").
Action can be one of the following:
- add: insert a new line of code before the line with the given number
- overwrite: overwrite the line with the given number
- delete: delete the line with the given number

Write down all changes and 
reply with a codeblock like this:
```python
# (line_number, 'action', 'new code')
# make sure use 'single quotes' and not "double quotes" or `backticks` like in this example:
changes = [
    (0, 'add', 'import newexample'),
    (4, 'overwrite', 'def foo():'),
    (5, 'add', '    print("hello world")'),
    (6, 'delete', '    print("bar")'),
    ...
]
```
"""
)])

change_files_dict = {}
for file_path in change_file_paths:
    file_content = issue.codebase.show_file(file_path.strip("'"))
    info_dict["file_path"] = file_path
    info_dict["file_content"] = file_content
    change_instructions = LLMChain(llm=chatgpt, prompt=change_file, verbose=True).run(info_dict)
    print(change_instructions)
    
    # Extract the codeblock
    changes_content = re.search(r'```python(.*?)```', change_instructions, re.DOTALL).group(1)

    # Match each tuple inside the changes list
    tuple_pattern = re.compile(r"\((\d+),\s*\'(.*?)\',\s*\'(.*?)\'\)", re.DOTALL)
    tuples = tuple_pattern.findall(changes_content)

    # Create a list of tuples representing the same content
    result = [(int(line), action, change) for line, action, change in tuples]

    # for line in result: print(line)
    
    change_files_dict[file_path] = result


In [None]:
for file, content in new_files_dict.items():
    issue.codebase.create_file(file, content)
    print("Created:", file, "\nwith content:", content, "\n")
    
for file, changes in change_files_dict.items():
    issue.codebase.change_file(file, changes)
    print("Changed:", file, "\nwith changes:", changes, "\n")

In [None]:
import re

data = """
Here are the changes as a list of tuples with the form `(line_number, 'action', 'new code')`:
```python
changes = [
    # (line_number, 'action', 'new code')
    (5, 'add', 'fastapi-users[oauth]==10.0.0'),
    (6, 'add', 'httpx==0.22.0'),
]
```
"""

# Extract the codeblock
changes_pattern = re.compile(r'```python(.*?)```', re.DOTALL)
changes_content = changes_pattern.search(data).group(1)

# Match each tuple inside the changes list
tuple_pattern = re.compile(r"\((\d+),\s*\'(.*?)\',\s*\'(.*?)\'\)", re.DOTALL)
tuples = tuple_pattern.findall(changes_content)

# Create a list of tuples representing the same content
result = [(int(line), action, change) for line, action, change in tuples]

for line in result: print(line)

In [None]:
# TODO: test the code and repeat until it works

# TODO: create a pull request