### Setup

In [1]:
import os
import json
import pandas as pd
from rich.panel import Panel
from rich import print as rprint

# Import util functions
from helper import set_openai_key,\
                    test_openai_api,\
                    create_openai_client,\
                    print_pretty,\
                    function_to_schema,\
                    count_tiktoken_length,\
                    safe_completion

# Import tools
from helper import generate_tree_string,\
                    get_lines_from_file,\
                    search_code, \
                    get_object_definition,\
                    generate_patches

print("Import successful!")

Import successful!


In [2]:
# Define paths
comp_dir = "konwinski-prize"
comp_kaggle_evaluation_dir = os.path.join(comp_dir, "kaggle_evaluation")
comp_kprize_setup_dir = os.path.join(comp_dir, "kprize_setup")

comp_data_zip_path = os.path.join(comp_dir, "data.a_zip")
comp_data_dir = os.path.join(comp_dir, "data")
comp_data_parquet_path = os.path.join(comp_data_dir, "data.parquet")
comp_conda_packages_dir = os.path.join(comp_data_dir, "conda_packages")
comp_pip_packages_dir = os.path.join(comp_data_dir, "pip_packages")
comp_repo_configs_dir = os.path.join(comp_data_dir, "repo_configs")
comp_repos_dir = os.path.join(comp_data_dir, "repos")

In [3]:
idx = 4 # 0,1,2,3

kprize_df = pd.read_parquet(comp_data_parquet_path)
print(kprize_df.shape)

row = kprize_df.iloc[idx]
problem_statement = row["problem_statement"]
instance_id = row["instance_id"]
repo_path = os.path.join(comp_repos_dir, f'repo__{instance_id}')

print(repo_path)

(6, 10)
konwinski-prize/data/repos/repo__astropy__astropy-16830


In [4]:
print(count_tiktoken_length(problem_statement))

print(problem_statement[:1000],"\n...")

598
KeyError: 'version_1_3_or_later' when parsing certain VOTables
### Description

When parsing VOTables (for instance) VOTables with empty integer literals in some places (e.g., MIN value="" in a VALUES element), astropy crashes with

```
KeyError: 'version_1_3_or_later'
```

### Expected behavior


My fix in the accompanying PR just stops the crashing, leading to a workable table.

### How to Reproduce

Try

```python
from astropy import table
table.Table.read("with-empty-min.vot", format="votable")
```

with the table from https://docs.g-vo. 
...


In [5]:
set_openai_key()

test_openai_api()

client = create_openai_client()

API key set successfully.
Hello! How can I assist you today?


### Run step by step

In [6]:
model_name = "gpt-4o" # "gpt-3.5-turbo" "gpt-4-turbo" "gpt-4o-mini" "gpt-4o"

system_prompt_template = """
### Context
You are a software automation agent responsible for diagnosing and resolving issues in a repository.  
Your task is to configure and utilize the provided tools efficiently.  
If the tools are insufficient, identify the gap, propose alternatives, and justify their necessity.  
Only call generate_patches if you can specify the exact files and modifications needed to resolve the issue.  
Do not request permission or confirmation before executing an action or generating a patch.  

### Metadata
- Repository root path: {repo_path}
"""


system_prompt = system_prompt_template.format(repo_path=repo_path)

messages = []

In [7]:
tools = [generate_tree_string, generate_tree_string, search_code, get_object_definition, generate_patches]
tool_schemas = [function_to_schema(tool) for tool in tools]
tools_map = {tool.__name__: tool for tool in tools}

**Note:** As of now, OpenAI's GPT models have a 1024-character limit for tool function descriptions, and there isn't a GPT model that allows for a larger description length.

#### Step1

In [8]:
# Call API
prompt = "Generate a patch to resolve the following repository issue:\n{problem_statement}"
prompt = prompt.format(problem_statement=problem_statement)

messages.append({"role":"user","content":prompt})

completion = safe_completion(client,
                             model_name=model_name,
                             messages=messages,
                             tool_schemas=tool_schemas,
                             retries=2
                            )

print_pretty(completion.dict())

{
    "choices": [
        {
            "finish_reason": "tool_calls",
            "index": 0,
            "logprobs": null,
            "message": {
                "audio": null,
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": [
                    {
                        "function": {
                            "arguments": "{\"root_directory\": \"./\", \"search_string\": \"version_1_3_or_later\"}",
                            "name": "search_code"
                        },
                        "id": "call_SkA6qDZI483FgTDAcsT9jCU2",
                        "type": "function"
                    },
                    {
                        "function": {
                            "arguments": "{\"root_directory\": \"./\", \"object_name\": \"version_1_3_or_later\", \"return_imports\": false}",
                            "name": "get_object_definitio

In [9]:
# Execute tools
tool_calls = completion.choices[0].message.tool_calls

if tool_calls:
    for tool_call in completion.choices[0].message.tool_calls:

        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        function = tools_map[function_name]
        result = function(**function_args)
        
        if function_name == "generate_patches":
            rprint(Panel(function_args["answer"], title="Answer", expand=False, border_style="green"))
    
        # Append assistant's tool call response
        messages.append({
            "role": "assistant",
            "content": None,
            "tool_calls": [
                {
                    "id": tool_call.id,
                    "type": "function",
                    "function": {
                        "name": function_name,
                        "arguments": json.dumps(function_args)
                    }
                }
            ]
        })
    
        # Append tool response correctly
        messages.append({
            "role": "tool",
            "tool_call_id": tool_call.id,
            "content": json.dumps(result)
        })
else:
    rprint(Panel(completion.choices[0].message.content, title="Response", expand=False, border_style="blue"))

#### Step2

In [10]:
# Call API
completion = safe_completion(client,
                             model_name=model_name,
                             messages=messages,
                             tool_schemas=tool_schemas,
                             retries=2
                            )

print_pretty(completion.dict())

# Execute tools
tool_calls = completion.choices[0].message.tool_calls

if tool_calls:
    for tool_call in completion.choices[0].message.tool_calls:
    
        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        function = tools_map[function_name]
        result = function(**function_args)
    
        if function_name == "generate_patches":
            rprint(Panel(function_args["answer"], title="Answer", expand=False, border_style="green"))
    
        # Append assistant's tool call response
        messages.append({
            "role": "assistant",
            "content": None,
            "tool_calls": [
                {
                    "id": tool_call.id,
                    "type": "function",
                    "function": {
                        "name": function_name,
                        "arguments": json.dumps(function_args)
                    }
                }
            ]
        })
    
        # Append tool response correctly
        messages.append({
            "role": "tool",
            "tool_call_id": tool_call.id,
            "content": json.dumps(result)
        })
else:
    rprint(Panel(completion.choices[0].message.content, title="Response", expand=False, border_style="blue"))

{
    "choices": [
        {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
                "audio": null,
                "content": "The issue seems to be related to the `KeyError` raised when accessing the dictionary key `version_1_3_or_later`. This key is likely missing from the configuration dictionary when certain VOTables are parsed. To fix this problem, you can ensure that the key is either explicitly set or use a method like `get()` with a default value to avoid the KeyError.\n\nHere's a patch that can resolve the issue by modifying the code to safely access the `version_1_3_or_later` configuration:\n\n### Patch\n\nModify the code that accesses the `config` dictionary to use the `get()` method with a default value. Here's an example modification in `tree.py` and `converters.py`:\n\n```python\n# Before modification\nif not self._config[\"version_1_3_or_later\"]:\n    # code...\n\n# After modification\nif not s

#### Step3

In [11]:
# Call API
completion = safe_completion(client,
                             model_name=model_name,
                             messages=messages,
                             tool_schemas=tool_schemas,
                             retries=2
                            )

print_pretty(completion.dict())

# Execute tools
tool_calls = completion.choices[0].message.tool_calls

if tool_calls:
    for tool_call in completion.choices[0].message.tool_calls:
    
        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        function = tools_map[function_name]
        result = function(**function_args)
    
        if function_name == "generate_patches":
            rprint(Panel(function_args["answer"], title="Answer", expand=False, border_style="green"))
    
        # Append assistant's tool call response
        messages.append({
            "role": "assistant",
            "content": None,
            "tool_calls": [
                {
                    "id": tool_call.id,
                    "type": "function",
                    "function": {
                        "name": function_name,
                        "arguments": json.dumps(function_args)
                    }
                }
            ]
        })
    
        # Append tool response correctly
        messages.append({
            "role": "tool",
            "tool_call_id": tool_call.id,
            "content": json.dumps(result)
        })
else:
    rprint(Panel(completion.choices[0].message.content, title="Response", expand=False, border_style="blue"))

{
    "choices": [
        {
            "finish_reason": "tool_calls",
            "index": 0,
            "logprobs": null,
            "message": {
                "audio": null,
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": [
                    {
                        "function": {
                            "arguments": "{\"folder_path\":\"./konwinski-prize/data/repos/repo__astropy__astropy-17048/astropy/io/votable/\"}",
                            "name": "generate_tree_string"
                        },
                        "id": "call_aYq6nkllBZ4ZYj8Q7acmJNr3",
                        "type": "function"
                    }
                ]
            }
        }
    ],
    "created": 1740917605,
    "id": "chatcmpl-B6cU9mj4LTnAWpX9nhXq4xUVmDGQG",
    "model": "gpt-4o-2024-08-06",
    "object": "chat.completion",
    "service_tier": "default",


#### Step4

In [12]:
# Call API
completion = safe_completion(client,
                             model_name=model_name,
                             messages=messages,
                             tool_schemas=tool_schemas,
                             retries=2
                            )

print_pretty(completion.dict())

# Execute tools
tool_calls = completion.choices[0].message.tool_calls

if tool_calls:
    for tool_call in completion.choices[0].message.tool_calls:
    
        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        function = tools_map[function_name]
        result = function(**function_args)
    
        if function_name == "generate_patches":
            rprint(Panel(function_args["answer"], title="Answer", expand=False, border_style="green"))
    
        # Append assistant's tool call response
        messages.append({
            "role": "assistant",
            "content": None,
            "tool_calls": [
                {
                    "id": tool_call.id,
                    "type": "function",
                    "function": {
                        "name": function_name,
                        "arguments": json.dumps(function_args)
                    }
                }
            ]
        })
    
        # Append tool response correctly
        messages.append({
            "role": "tool",
            "tool_call_id": tool_call.id,
            "content": json.dumps(result)
        })
else:
    rprint(Panel(completion.choices[0].message.content, title="Response", expand=False, border_style="blue"))

{
    "choices": [
        {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
                "audio": null,
                "content": "To address the `KeyError: 'version_1_3_or_later'` issue in the Astropy VOTable parser, let's inspect how `\"version_1_3_or_later\"` is being utilized and apply a fix. The key seems to be accessed directly without first ensuring it exists in the configuration dictionaries (`self._config` or `config`). Here's a plan to handle this more safely by utilizing `get()` method which can provide a default value if the key doesn't exist.\n\n### Files to Modify\nThe `KeyError` is likely being raised from direct dictionary indexing operations. We have identified the following lines where `version_1_3_or_later` might be accessed incorrectly:\n\n1. **tree.py** at line 688 and 2505\n2. **converters.py** at line 612 and 1141\n\n### Proposed Fix\nReplace direct access of `config[\"version_1_3_or_later\"

#### Step5

In [13]:
# Call API
completion = safe_completion(client,
                             model_name=model_name,
                             messages=messages,
                             tool_schemas=tool_schemas,
                             retries=2
                            )

print_pretty(completion.dict())

# Execute tools
tool_calls = completion.choices[0].message.tool_calls

if tool_calls:
    for tool_call in completion.choices[0].message.tool_calls:
    
        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        function = tools_map[function_name]
        result = function(**function_args)
    
        if function_name == "generate_patches":
            rprint(Panel(function_args["answer"], title="Answer", expand=False, border_style="green"))
    
        # Append assistant's tool call response
        messages.append({
            "role": "assistant",
            "content": None,
            "tool_calls": [
                {
                    "id": tool_call.id,
                    "type": "function",
                    "function": {
                        "name": function_name,
                        "arguments": json.dumps(function_args)
                    }
                }
            ]
        })
    
        # Append tool response correctly
        messages.append({
            "role": "tool",
            "tool_call_id": tool_call.id,
            "content": json.dumps(result)
        })
else:
    rprint(Panel(completion.choices[0].message.content, title="Response", expand=False, border_style="blue"))

{
    "choices": [
        {
            "finish_reason": "tool_calls",
            "index": 0,
            "logprobs": null,
            "message": {
                "audio": null,
                "content": "The `KeyError` involving `'version_1_3_or_later'` suggests that this key might be accessed in the code without ensuring its presence within the configuration dictionary. Based on the file paths where this key is being accessed, let's look into fixing the potential source of the error in `astropy/io/votable`.\n\nHere's the approach we'll follow and implement:\n\n### Potential Fix:\n\n1. **Guard Against Missing Keys**:\n    - Instead of directly accessing the configuration with `config[\"version_1_3_or_later\"]`, use `config.get(\"version_1_3_or_later\")`. This avoids a `KeyError` by returning `None` if the key is not found.\n    - If `None` is not suitable, consider providing a default value that makes sense in the context, like `False`, if you expect boolean operations.\n\n2. **P