In [None]:
#!/usr/bin/env python
# coding: utf-8
import openai
from github import Github, UnknownObjectException
import tiktoken
from myers import diff

REPO_NAME = 'mloef/llm-hs-claims-public'
openai.api_key = 'nope'
GITHUB_ACCESS_TOKEN = 'nuh uh'

MSG_PROMPT = '''You are an experienced software engineer who writes clean, well-commented, and concise code. When reviewing the following pull request, consider these aspects:
1. Consistency: Is there a discrepancy between the 'Changes' section and the PR description? If there's a discrepancy, it should be addressed.
2. Completeness: Are the changes complete and do they resolve the issue they're intended to solve?
3. Code quality: Is the code well-structured, readable, and maintainable?
4. Testing: Are there adequate tests covering the changes?
5. Potential impact: Could the changes break existing functionality?

Provide a detailed and critical review of the pull request. Focus only on the changes and do not review the unchanged code.
Note that the 'Changes' section is provided in git diff format with custom line annotations, which are ADDED and REMOVED.
Be concise.
'''

METADATA_PROMPT = '''PR title: %s
PR description: %s
'''

CHANGE_PROMPT = '''Filename: %s

Changes:%s
'''

REVIEW_EXAMPLE_PROMPT = '''

For reference, a good review might look like the following examples:
"The changes in this PR are incomplete. While the 'functionA' has been removed, it's not clear if other parts of the code that depended on this
function have been updated to reflect this change. This could potentially break existing functionality. Additionally, there are no new tests
added to verify these changes. The PR needs to be updated to address these issues."
"The information provided about the PR description and the 'Changes' section is inconsistent. While the PR description states that a function was removed,
the 'Changes' section indicates the addition of a new file. This suggests that the file was nothing was removed. Please clarify this discrepancy."
Note: This example is only for reference and should not be included in the actual review.
'''


def getRepo(github_access_token=GITHUB_ACCESS_TOKEN):
    github = Github(github_access_token)
    return github.get_repo(REPO_NAME)


def getPrs(repo=getRepo()):
    return repo.get_pulls(base='main', state='open')


def formatPrompt(changes, title, description):
    prompt = METADATA_PROMPT % (title, description)
    for name, files in changes.items():
        new = files[0]
        old = files[1]
        formatDict = {
            'k': '%s',
            'i': 'ADDED: %s',
            'r': 'REMOVED: %s',
            'o': '(...%s skipped...)',
        }
        fileDiff = '\n'.join(diff(old.split('\n'), new.split('\n'), format=formatDict))
        prompt += CHANGE_PROMPT % (name, fileDiff)

    return prompt + REVIEW_EXAMPLE_PROMPT


def getReview(changes, pr):
    user_prompt = formatPrompt(changes, pr.title, pr.body)
    enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
    print(f'Total tokens: {len(enc.encode(user_prompt))}')

    #print(user_prompt)
    intial_reply = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-16k",
        messages=[
            {"role": "system", "content": MSG_PROMPT},
            {"role": "user", "content": user_prompt},
        ],
        temperature=0.0,
    )

    return intial_reply['choices'][0]['message']['content']


repo = getRepo()
# Change = namedtuple('Change', ['file', 'patch'])
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")

for pr in getPrs(repo):
    print('PR title:', pr.title)
    # print('PR url:', pr.diff_url)
    print('PR number:', pr.number)
    if pr.number != 5:
        pass#continue

    commits = pr.get_commits()

    fileContents = {}

    for commit in commits:
        files = commit.files
        for file in files:
            filename = file.filename
            #print(filename)
            contents = repo.get_contents(
                filename, ref=commit.sha).decoded_content.decode('utf-8')
            # print(contents)
            fileContents[filename] = [contents]

    for name in fileContents.keys():
        try:
            formerContents = repo.get_contents(
                name).decoded_content.decode('utf-8')
        except UnknownObjectException:
            formerContents = ''
        
        fileContents[name].append(formerContents)

    result = getReview(fileContents, pr)
    print(result)
    print()
    print()
