In [1]:
from IPython.core.display import HTML


def apply_css_styles(
    font_name: str = "Lato",
    fallback_font: str = "Verdana",
    css_content: str = None,
    verbose: bool = False
) -> HTML:
    """Applies custom CSS styles within a Jupyter notebook cell.

    Args:
        font_name (str, optional): 
            The primary font to use in the styles.
        fallback_font (str, optional): 
            The fallback font to use if the primary font is unavailable.
        css_content (str, optional): 
            Custom CSS content to use. 
            If None, default styles are used.
        verbose (bool, optional): 
            Whether to print the generated CSS for debugging.

    Returns:
        IPython.core.display.HTML: 
            HTML object with the injected styles.
    """
    try:
        # Default CSS content if none is provided
        default_css = '''
p, li, a, b, h1, h2, h3, h4, h5, h6, title, ul, strong, sup, sub, em, i, blockquote, label {
    font-family: Verdana !important;
}

b, h1 {
    font-weight: 900 !important;
}

h2, h3, h4 ul {
    font-weight: 700 !important;
}

.fa, .far, .fas {
    font-family: "Font Awesome 5 Free" !important;
}
'''

        # Generate font import string dynamically based on the provided font name
        font_import = (
            f"\n@import url('https://fonts.googleapis.com/css2?family={font_name.replace(' ', '+')}:ital,wght@0,100;0,300;0,400;0,700;0,900;1,100;1,300;1,400;1,700;1,900&display=swap');\n"
        )

        # Use provided CSS content or fallback to default
        css_to_use = css_content or default_css

        # Replace fallback font in the CSS content
        css_to_use = css_to_use.replace("Verdana", font_name)

        # Combine the font import and the CSS content into a single HTML style block
        combined_styles = f"<style>{font_import}{css_to_use}</style>"

        if verbose:
            print(combined_styles)  # Print the CSS for debugging if verbose is True

        return HTML(combined_styles)  # Return the generated styles as an HTML object

    except Exception as e:
        raise RuntimeError(f"An error occurred while applying styles: {str(e)}")

# Apply styles (example usage)
apply_css_styles(verbose=False)

<div style="margin: 0;  border-radius: 1em; background-color: #48B0F7; text-align: center; color: white; padding: 50px 20px; position: relative;">
  <h1 style="font-size: 36px; letter-spacing: 0.05em; margin: 0 20px; font-weight: 900;">KPRIZE vs. SWE-BENCH</h1>
  <p style="font-size: 20px; margin: 25px 0 0; padding-left: 10px !important;">Can Language Models Resolve Real-World GitHub Issues?</p>
  <img src="https://www.swebench.com/img/swellama.png" alt="Swell Llama" style="position: absolute; right: 5%; top: 50%; transform: translateY(-50%); width: 135px;">
    <img src="https://www.kaggle.com/competitions/84795/images/header" style="position: absolute; right: 0%; top: 0%; transform: translateY(-50%); width: 100px; border-radius: 0.25em; opacity: .9;">
</div>

<br style="margin: 15px;">

<h2 style="text-align: center; font-size: 30px; font-style: normal; font-weight: 800; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">
    <span style="text-decoration: underline;">
        <font color=#48B0F7><b>L</b></font>ET'S 
        <font color=#48B0F7><b>L</b></font>EARN 
        <font color=#48B0F7><b>T</b></font>OGETHER !
    </span><br><br style="margin: 15px;">
<span style="font-size: 22px; letter-spacing: 1px;">
    <font color=#48B0F7><b>U</b></font>NDERSTANDING    
    <font color=#48B0F7><b>T</b></font>HROUGH
    <font color=#48B0F7><b>E</b></font>XPLORATION</span>
<br style="margin: 15px;"></h2>

<p style="text-align: center; font-size: 15px; font-style: normal; font-weight: bold; text-decoration: None; text-transform: none; letter-spacing: 1px; color: black; background-color: #ffffff;">CREATED BY: DARIEN SCHETTLER</p>

<hr>

<center><div class="alert alert-block alert-danger" style="margin: 2em; line-height: 1.7em;">
    <b style="font-size: 18px;">🛑 &nbsp; WARNING:</b><br><br><b><s>THIS IS A WORK IN PROGRESS</s> <span style="font-size: 24px;"> ---- SUPER–DUPER–WIP ---- </span></b><br>
</div></center>

<center><div class="alert alert-block alert-warning" style="margin: 2em; line-height: 1.7em;">
    <b style="font-size: 16px;">👏 &nbsp; IF YOU FORK THIS OR FIND THIS HELPFUL &nbsp; 👏</b><br><br><b style="font-size: 22px; color: darkorange">PLEASE UPVOTE!</b><br><br>This was a lot of work for me, and it may seem silly, but the 🔼 makes me feel appreciated. 😅
</div></center>

<hr>

<h1 style="font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; color: #203354; background-color: #ffffff;">
    CHANGELOG
</h1>

<ul>
    <li>
        <b>Version 1-3</b>
        <ul>
            <li>Initial Versions</li>
            <li>Just getting the notebook setup</li>
        </ul>
    </li>
    <li>
        <b>Version 4</b>
        <ul>
            <li>Added Secret Management</li>
            <li>Reorganize a Bit</li>
            <li>Begin the EDA</li>
        </ul>
    </li>
    <li>
        <b>Version 5-7</b>
        <ul>
            <li>Make a semi-manual Gemini based agent to test approximate workflow</li>
        </ul>
    </li>
</ul>

<p id="toc"></p>

<h1 style="font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; color: #203354; background-color: #ffffff;">
    TABLE OF CONTENTS
</h1>

<hr>

<h3 style="text-indent: 10vw; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; background-color: #ffffff;"><a href="#introduction" style="text-decoration: none; color: #48B0F7;">1&nbsp;&nbsp;&nbsp;&nbsp;INTRODUCTION & JUSTIFICATION</a></h3>

<hr>

<h3 style="text-indent: 10vw; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; background-color: #ffffff;"><a href="#background_information" style="text-decoration: none; color: #48B0F7;">2&nbsp;&nbsp;&nbsp;&nbsp;BACKGROUND INFORMATION</a></h3>

<hr>

<h3 style="text-indent: 10vw; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; background-color: #ffffff;"><a href="#imports" style="text-decoration: none; color: #48B0F7;">3&nbsp;&nbsp;&nbsp;&nbsp;IMPORTS</a></h3>

<hr>

<h3 style="text-indent: 10vw; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; background-color: #ffffff;"><a href="#setup" style="text-decoration: none; color: #48B0F7;">4&nbsp;&nbsp;&nbsp;&nbsp;SETUP AND HELPER FUNCTIONS</a></h3>

<hr>

<h3 style="text-indent: 10vw; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; background-color: #ffffff;"><a href="#eda" style="text-decoration: none; color: #48B0F7;">5&nbsp;&nbsp;&nbsp;&nbsp;EXPLORATORY DATA ANALYSIS</a></h3>

<hr>

<br>

<a id="introduction"></a>

<h1 style="font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: #48B0F7;" id="introduction">1&nbsp;&nbsp;INTRODUCTION & JUSTIFICATION&nbsp;&nbsp;&nbsp;&nbsp;<a style="text-decoration: none; color: #203354;" href="#toc">&#10514;</a></h1>

<br>


<br>

<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">1.1 <b>WHAT</b> IS THIS?</h3>
<hr>

<ul>
    <li>This notebook will follow the authors learning path and highlight relevant terms, information, and useful content about the competition.</li>
    <li>This notebook will conduct an <b>E</b>xploratory <b>D</b>ata <b>A</b>nalysis for the competition.</li>
    <li>This notebook <i>may</i> propose an open-source baseline solution.</li>
</ul>

<br>

<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">1.2 <b>WHY</b> IS THIS?</h3>
<hr>

<ul>
    <li>Writing and sharing my learning path and the resulting exploratory data analysis can help improve my own understanding of the competition and the data.</li>
    <li>Sharing my work may help others who are interested in the competition (or the data). This help may take the form of:
        <ul>
            <li>Better understanding the problem and potential common solutions (incl. my baseline).</li>
            <li>Better understanding of the provided dataset.</li>
            <li>Better understanding of the background information and research.</li>
            <li>Better ability to hypothesize new solutions.</li>
        </ul>
    </li>
    <li>Exploratory data analysis is a critical step in any data science project. Sharing my EDA might help others in the competition.</li>
    <li>Writing and sharing my work is often a fun and rewarding experience! It not only allows me to explore and try different techniques, ideas, and visualizations but also encourages and supports other learners and participants.</li>
</ul>

<br>

<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">1.3 <b>WHO</b> IS THIS FOR?</h3>
<hr>


<ul>
    <li>The primary purpose of this notebook is to educate <b>MYSELF</b>, however, my review/learning might be beneficial to others:
        <ul>
            <li>Other Kagglers (aka. current and future competition participants).</li>
            <li>Anyone interested in learning more about using artificial intelligence to tackle software engineering tasks directly.</li>
        </ul>
    </li>
</ul>


<br>

<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">1.4 <b>HOW</b> WILL THIS WORK?</h3>
<hr>


<p>I'm going to assemble some markdown cells (like this one) at the beginning of the notebook to go over some concepts/details/etc.</p>

<p>Following this, I will likely go through a few examples from the SWE-Lite dataset to better understand the problems and how they are solved</p>

<br>

<a id="background_information"></a>

<h1 style="font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: #48B0F7;" id="background_information">2&nbsp;&nbsp;BACKGROUND INFORMATION&nbsp;&nbsp;&nbsp;&nbsp;<a style="text-decoration: none; color: #203354;" href="#toc">&#10514;</a></h1>

<br>

<b>Your challenge is to develop an 'agent' that can resolve real-world GitHub issues.</b> 

This competition builds on <a rel="noreferrer nofollow" aria-label="the SWE-bench benchmark (opens in a new tab)" target="_blank" href="https://www.swebench.com/">the SWE-bench benchmark</a> by using Kaggle's forecasting format to ensure that all of the git issues used for the private test set cannot did not exist when the submitted model was trained.

Essentially, this competition emulates <b><a href="https://www.swebench.com/">SWE-BENCH</a></b> with some minor – BUT IMPORTANT – modifications. Let's briefly discuss those differences beefore we dive into more generic background information about <b><a href="https://www.swebench.com/">SWE-BENCH</a></b> and other related background information.

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">KEY DIFFERENCES BETWEEN KPRIZE AND SWE-BENCH</b>
<br>

Besides the obvious key differences of where/how the hosting is done (containerization, raw setup, constraints, etc.), the following are the main differences I wanted to call out.

1. **EVALUATION METRICS**
    * **SWE-Bench:**
        * Primarily measures the percentage of issues resolved by the AI agent, focusing on the agent's ability to generate correct solutions without explicitly penalizing incorrect ones.
    * **KPrize:**
        * The **scoring metric** for the competition is designed to incentivize skipping an issue (if the solution is uncertain) over submitting a bad patch. Here's how the formula works:
        * $$\text{score} = \frac{a - b}{a + b + c}$$
        * Where:
            - $a$ is the number of correctly resolved issues.
            - $b$ is the number of failing issues (incorrect solutions).
            - $c$ is the number of skipped issues.
2. **DATA CONTAMINATION AND PHASING**
    * **SWE-Bench**:
        * The test set is publicly available, which poses a risk of data contamination.
        * Models might inadvertently train on test data, leading to inflated performance metrics.
    * **KPrize**:
        * To mitigate the contamination issue, KPrize utilizes Kaggle's forecasting format to ensure that all of the git issues used for the private test set cannot did not exist when the submitted model was trained.
        * This results in a phased competition:
            > **------** PHASE 1 **------**
            > 
            > A model training phase with a leaderboard using only the public test set of historical data. This test set has about 100 instances and will require approximately one hour of your notebook's nine hour runtime limit to execute unit tests. We expect to improve this performance in the future. The public test set labels can be scraped from github. Accordingly, we may make updates mid-competition in order to keep the public leaderboard reasonably useful. With that said, it is infeasible to keep the public leaderboard results entirely leak-free.
            >
            > 
            > **------** PHASE 2 **------**
            > 
            > A forecasting phase with a leaderboard using only data collected after the submission deadline. You should expect this test set to contain approximately 150-200 instances. The exact count will be provided by the evaluation API. The public test set instances will not be served by the evaluation API during the forecasting phase.
            

3. **THE SWE-BENCH PYTHON LIBRARY**
    * **SWE-Bench**
        * The [SWE-Bench library](https://github.com/swe-bench/SWE-bench) is designed to evaluate the ability of large language models (LLMs) to generate patches that resolve real-world software issues. It provides tasks where models receive an issue description and a codebase, and must generate fixes to address the issue. Below is a high-level overview of its structure:
        * **Root Directory (SWE-bench)**:
            * Contains essential files for the benchmark, such as setup scripts, configuration files, and Docker files for creating reproducible environments.
            * **Subdirectory (`swebench`)**: The core module of SWE-Bench, organized into several submodules:
                * **`collect`**: Handles the collection of evaluation tasks from GitHub repositories, including mirroring repositories and extracting task instances.
                * **`harness`**: Provides the evaluation framework to assess model-generated patches against the benchmark.
                * **`inference`**: Manages running inference for models, facilitating the generation of fixes for issues in repositories.
                * **`versioning`**: Maintains version control and manages installation configurations for repositories in the benchmark.
                * **`utils`**: Includes utility functions shared across the framework to support core functionalities.
    * **KPrize**
        * `kprize_setup/kprize` is a package in the competition dataset that contains the files used for installing this competition's adaptation of <a rel="noreferrer nofollow" aria-label="the swebench library (opens in a new tab)" target="_blank" href="https://github.com/princeton-nlp/SWE-bench">the SWE-Bench library</a>. Note that this won't currently work on Windows. It has quite a few differences, but at a high level the structure is (ignoring the root directory):
        * **Package Directory (`kprize`)**:
            * Contains many utility files and other essential files (far more than are found in the original implementation). I think some of the utility files here may have been found previously in the **`utils`** submodule?
            * Contains various submodules that mostly overlap with the original implementation but differ in some regards:
                * **`collection`**: Same as **`collect`**, but I haven't investigated.
                * **`harness`**: This is likely the same as the original, but I haven't investigated.
                * **`bundling`**: This is new. TBD
                * **`evaluation`**: This is new. TBD
                * **`scripts`**: This is new. TBD


<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">2.1 <b>UNDERSTANDING</b> THE <b>K</b>PRIZE</h3>
<hr>

<center><img src="https://andykonwinski.com/assets/img/kprize-tweet.png" width=80%></center>

The K Prize competition, announced by Andy Konwinski at NeurIPS in December 2024, inspired by SWE-Bench but born from a passion for open-source innovation and competitive programming, this prize challenges teams to build AI models that can effectively solve real-world GitHub issues.

The competition builds upon SWE-bench, a benchmark that tests AI models against real-world software engineering problems from GitHub repositories. However, it introduces a crucial innovation: a contamination-free evaluation framework that prevents models from being inadvertently trained on test data.

As Konwinski notes, "Automating this task will let human software engineers spend lots more time designing new features, reforming abstractions, interfacing with users, and other tasks that are more inherently human (and, for many of us, more fun)." 

By providing compute resources through Kaggle and implementing a substantial prize structure, the competition aims to catalyze research progress in the same way the Netflix Prize inspired the creation of Apache Spark.

<br>

<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">2.2 <b>COMPETITION OVERVIEW</b></h3>
<hr>

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">PRIMARY TASK DESCRIPTION</b>
<br>
<br>
Develop open-source large language models capable of achieving a 90% score on a contamination-free version of SWE-bench, significantly surpassing the current highest-performing model's 55% achievement. The competition specifically targets real-world software engineering problems, with test data collected after the submission deadline to ensure genuine evaluation.

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">COMPETITION STRUCTURE</b>
<br>
<br>
The competition proceeds in two distinct phases:

**Model Training Phase:**
* Public test set of ~100 historical instances
* Approximately one hour runtime per evaluation
* Public leaderboard based on historical data
* Labels can be scraped from GitHub

**Forecasting Phase:**
* 150-200 new instances collected post-submission
* Completely new, unseen test data
* Final evaluation on contamination-free dataset
* Exact instance count provided by evaluation API

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">EVALUATION INNOVATIONS AND THE METRIC</b>
<br>
<br>
The K Prize introduces three key innovations:

**Contamination-Free Testing:**
* Test set created after model submissions
* Prevents training on test data
* Ensures genuine evaluation of AI capabilities

**Focus on Open Source:**
* Only open-source code and open-weight models eligible
* Encourages community collaboration
* Builds on collective innovation

**Support for Independent Developers:**
* Kaggle provides computing resources
* Levels playing field for smaller teams
* Promotes "small AI" innovation

<br>

Submissions are scored using a metric that rewards quality over quantity:

$$\text{score} = \frac{a - b}{a + b + c}$$

Where:
- $a$ is the number of correctly resolved issues.
- $b$ is the number of failing issues (incorrect solutions).
- $c$ is the number of skipped issues.

This formula **HEAVILY incentivizes** skipping difficult issues rather than guessing or submitting incorrect patches.

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">PRIZE STRUCTURE</b>
<br>
<br>
Total Prize Fund: **\$1,225,000**

* **Leaderboard Prizes:**
    * 1st Place: **\$50,000**
    * 2nd Place: **\$20,000**
    * 3rd-5th Place: **\$10,000 each**

* **Threshold Bonuses:**
    * Additional **\$50,000** pool for each threshold (30%, 40%, 50%, 60%, 70%, 80%, 90%)
    * Distributed proportionally among qualifying top-5 teams

* **Grand Prize:**
    * **\$775,000** additional for first place if reaching 90%
    * Brings total potential first place prize to **\$1 million**

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">TECHNICAL FRAMEWORK REQUIREMENTS</b>
<br>
<br>
**Environment:**
* Python 3.11-based evaluation environments
* Custom adaptation of SWE-bench library
* Kaggle-provided Docker container recommended for testing

**Data Structure:**
* Issue metadata including repo, problem statement, and test information
* Complete repository copies for context
* Specialized evaluation API for submission handling

**Submission Requirements:**
* Must use provided Python evaluation API
* Open source code and open weight models only
* Full documentation and reproducibility required

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">COMPETITION HOST(S)/CONTRIBUTOR(S)</b>
<br>
<br>
<b><u><a href="https://andykonwinski.com/about/">Andy Konwinski</a></u></b> is a co-founder of Databricks, Perplexity and Laude Ventures. The prize is personally funded by Konwinski, leveraging his success from Databricks (valued at $62 billion) and Perplexity (approaching double-digit billion-dollar valuation). 

The competition is hosted on Kaggle, working in collaboration with SWE-bench to develop and maintain the evaluation framework.

The initiative was <b><a href="https://andykonwinski.com/2024/12/12/konwinski-prize.html">announced at the Neural Information Processing Systems (NeurIPS) conference</a></b> in December 2024.

Read more about Andy and bask in his beardliness here

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">KEY DATES</b>
<br>
<br>
**Start Date:** December 11, 2024

**Entry Deadline:** March 5, 2025

**Team Merger Deadline:** March 5, 2025

**Final Submission Deadline:** March 12, 2025

**Competition End Date:** June 11, 2025


<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">2.3 AN <b>EXAMPLE</b> TO ILLUSTRATE <a href="https://github.com/astropy/astropy/issues/16825">(ASTROPY ISSUE 16825)</a></h3>
<hr>

Retrieved with:

```python
# (0) Necessary imports
import pandas as pd

# (1) Unzip so we have access to the KPRIZE dataset
%%capture
!mkdir -p /kaggle/tmp/konwinski-prize-alt
!unzip /kaggle/input/konwinski-prize/data.a_zip -d /kaggle/tmp/konwinski-prize-alt/

# (2) Load into a dataframe
df = pd.read_parquet('/kaggle/tmp/konwinski-prize-alt/data/data.parquet')

# (3) Get a pd.Series for a demo example
#       --> This is actually the row with the least text in `patch` and `test_patch` combined
DEMO_EX = df.iloc[4]
```

And then I manually formatted and took photos and hosted the relevant images...

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">`instance_id`</b>
<br>
<br>

> astropy__astropy-16830

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">`repo`</b>
<br>
<br>

> astropy/astropy

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">`problem_statement`</b>
<br>
<br>

> KeyError: \'version_1_3_or_later\' when parsing certain VOTables\n### Description\n\nWhen parsing VOTables (for instance) VOTables with empty integer literals in some places (e.g., MIN value="" in a VALUES element), astropy crashes with\r\n\r\n\`\`\`\r\nKeyError: \'version_1_3_or_later\'\r\n\`\`\`\n\n### Expected behavior\n\nWell, you *could* argue that at least some such VOTables should be rejected (a NULL value in a MIN, indeed, does not make sense) with a sensible error message; with the patch proposed in the accompanying PR, astropy emits a warning.  I notice in passing that the reproducer table passes stilts votlint.  I also note in passing that against the warning, a value="" is accepted in a PARAM (as it should, as this is the way to express NULLs).\r\n\r\nMy fix in the accompanying PR just stops the crashing, leading to a workable table.\n\n### How to Reproduce\n\nTry\r\n\r\n\`\`\`python\r\nfrom astropy import table\r\ntable.Table.read("with-empty-min.vot", format="votable")\r\n\`\`\`\r\n\r\nwith the table from https://docs.g-vo.org/with-empty-min.vot\n\n### Versions\n\nBoth current HEAD and what\'s in Debian bookworm.\r\n\nFix votable 1 3 check\n<!-- These comments are hidden when you submit the pull request,\r\nso you do not need to remove them! -->\r\n\r\n\r\n### Description\r\n<!-- Provide a general description of what your pull request does.\r\nComplete the following sentence and add relevant details as you see fit. -->\r\n\r\nWhen parsing VOTables (for instance) VOTables with empty integer literals in some places (e.g., MIN value="" in a VALUES element), astropy crashes with\r\n\r\n\`\`\`\r\nKeyError: \'version_1_3_or_later\'\r\n\`\`\`\r\n\r\nThis is a simple fix for the problem at hand, doing the key check in analogy to the other key checks of this sort in the few places where an index rather than the get method was used on the config object.\r\n\r\nOne *might* want to dig deeper, though; I am not exactly sure why the version_1_3_or_later key is missing on the example table from the bug report.  But I suspect (well: hope:-) that is not critical for the bug fix.\r\n\r\n<!-- In addition please ensure that the pull request title is descriptive\r\nand allows maintainers to infer the applicable subpackage(s). -->\r\n\r\n<!-- READ THIS FOR MANUAL BACKPORT FROM A MAINTAINER:\r\nApply "skip-basebranch-check" label **before** you open the PR! -->\r\n\r\n\r\n<!-- If the pull request closes any open issues you can add this.\r\nIf you replace <Issue Number> with a number, GitHub will automatically link it.\r\nIf this pull request is unrelated to any issues, please remove\r\nthe following line. -->\r\n\r\nFixes #16825.\n

<img src="https://github.com/darien-schettler/asset-hosting/blob/main/astropy_issue_16825.png?raw=true" width=90%>

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">`patch`</b>
<br>
<br>

> diff --git a/astropy/io/votable/tree.py b/astropy/io/votable/tree.py\nindex 036ca8af732..ed85aeffcfd 100644\n--- a/astropy/io/votable/tree.py\n+++ b/astropy/io/votable/tree.py\n@@ -1053,7 +1053,7 @@ def min(self):\n     @min.setter\n     def min(self, min):\n         if hasattr(self._field, "converter") and min is not None:\n-            self._min = self._field.converter.parse(min)[0]\n+            self._min = self._field.converter.parse(min, config=self._config)[0]\n         else:\n             self._min = min\n \n@@ -1089,7 +1089,7 @@ def max(self):\n     @max.setter\n     def max(self, max):\n         if hasattr(self._field, "converter") and max is not None:\n-            self._max = self._field.converter.parse(max)[0]\n+            self._max = self._field.converter.parse(max, config=self._config)[0]\n         else:\n             self._max = max\n \ndiff --git a/docs/changes/io.votable/16830.bugfix.rst b/docs/changes/io.votable/16830.bugfix.rst\nnew file mode 100644\nindex 00000000000..d30a2a9ff96\n--- /dev/null\n+++ b/docs/changes/io.votable/16830.bugfix.rst\n@@ -0,0 +1,1 @@\n+Fix KeyError when parsing certain VOTables.\n

<img src="https://github.com/darien-schettler/asset-hosting/blob/main/astropy_pr_16830_patch.png?raw=true" width=80%>

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">`test_patch`</b>
<br>
<br>

> diff --git a/astropy/io/votable/tests/test_tree.py b/astropy/io/votable/tests/test_tree.py\nindex 7fd771cef77..224d189a33f 100644\n--- a/astropy/io/votable/tests/test_tree.py\n+++ b/astropy/io/votable/tests/test_tree.py\n@@ -90,6 +90,31 @@ def test_namespace_warning():\n     parse(io.BytesIO(good_namespace_13), verify="exception")\n \n \n+def test_votable_values_empty_min_max():\n+    """Regression test for https://github.com/astropy/astropy/issues/16825"""\n+    with_empty_minmax = b"""<VOTABLE xmlns="http://www.ivoa.net/xml/VOTable/v1.3" version="1.4">\n+        <RESOURCE type="results">\n+          <TABLE name="main">\n+            <PARAM name="break" datatype="int" value=""/>\n+          <FIELD ID="hd" datatype="int" name="hd" ucd="meta.id;meta.main">\n+            <DESCRIPTION>HD number for this object</DESCRIPTION>\n+            <VALUES null="-2147483648">\n+              <MIN value=""/>\n+              <MAX value=""/>\n+            </VALUES>\n+          </FIELD>\n+          <DATA>\n+            <BINARY>\n+              <STREAM encoding="base64">AAMNIg==</STREAM>\n+            </BINARY>\n+          </DATA>\n+        </TABLE>\n+      </RESOURCE>\n+    </VOTABLE>\n+    """\n+    parse(io.BytesIO(with_empty_minmax), verify="exception")\n+\n+\n def test_version():\n     """\n     VOTableFile.__init__ allows versions of \'1.1\', \'1.2\', \'1.3\' and \'1.4\'.\n

<img src="https://github.com/darien-schettler/asset-hosting/blob/main/astropy_pr_16830_test_patch.png?raw=true" width=80%>

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">`pull_number`</b>
<br>
<br>

> 16830

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">`base_commit`</b>
<br>
<br>

> e39f486fec48d87aa3677326167954370d7a7bf9

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">`PASS_TO_PASS`</b>
<br>
<br>

> ['astropy/io/votable/tests/test_tree.py::test_check_astroyear_fail'
 'astropy/io/votable/tests/test_tree.py::test_string_fail'
 'astropy/io/votable/tests/test_tree.py::test_make_Fields'
 'astropy/io/votable/tests/test_tree.py::test_unit_format'
 'astropy/io/votable/tests/test_tree.py::test_namespace_warning'
 'astropy/io/votable/tests/test_tree.py::test_version'
 'astropy/io/votable/tests/test_tree.py::test_votable_tag'
 'astropy/io/votable/tests/test_tree.py::test_mivot_constructor'
 'astropy/io/votable/tests/test_tree.py::test_mivot_readout'
 'astropy/io/votable/tests/test_tree.py::test_mivot_write'
 'astropy/io/votable/tests/test_tree.py::test_mivot_write_after_table'
 'astropy/io/votable/tests/test_tree.py::test_write_no_mivot'
 'astropy/io/votable/tests/test_tree.py::test_mivot_write_after_resource'
 'astropy/io/votable/tests/test_tree.py::test_mivot_forbidden_write'
 'astropy/io/votable/tests/test_tree.py::test_mivot_order']

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">`FAIL_TO_PASS`</b>
<br>
<br>

> ['astropy/io/votable/tests/test_tree.py::test_votable_values_empty_min_max']

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">`issue_numbers`</b>
<br>
<br>

> [<a href="https://github.com/astropy/astropy/issues/16825">16825</a>, <a href="https://github.com/astropy/astropy/issues/16826">16826</a>]

<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">2.4 EVALUATION INFORMATION</h3>
<hr>

<br>

To determine what 'correct' means we should review the description in the original SWE-Bench paper.
> Evaluation is performed by unit test verification using post-PR behavior as the reference solution.

Knowing this we can reexamine the metric provided by the hosts below:

$$\text{score} = \frac{a - b}{a + b + c}$$

Where:
- $a$ is the number of correctly resolved issues.
- $b$ is the number of failing issues (incorrect solutions).
- $c$ is the number of skipped issues.

Remember, this formula **HEAVILY incentivizes** skipping difficult issues rather than guessing or submitting incorrect patches.

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">HOST PROVIDED IMPLEMENTATION</b>
<br>
<br>

```python
# TBD
def host_implementation(...):
    ...
```

As a placeholder here is my overly verbose implementation that would simply take the number of correct, incorrect and skipped examples:

```python
def calculate_score(correct: int, incorrect: int, skipped: int) -> float:
    """Calculate the SWE-Bench score based on correct, incorrect, and skipped solutions.
    
    Args:
        correct (int): Number of correctly resolved issues (a)
        incorrect (int): Number of failing issues (b)
        skipped (int): Number of skipped issues (c)
    
    Returns:
        float: Score calculated using (a-b)/(a+b+c) formula
    """
    # All values must be positive or 0 and all values cannot be 0
    correct, incorrect, skipped = max(0, correct), max(0, incorrect), max(0, skipped)
    if not any([correct, incorrect, skipped])
        return 0.0
        
    # Calculate score using provided formula (no 0 check required due to above)
    return (correct - incorrect) / (correct + incorrect + skipped)
```

<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">2.5 COMPETITION DATA OVERVIEW</h3>
<hr>

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">DATA FILE DESCRIPTIONS</b>

**`data.a_zip` description:**
* A renamed `.zip` archive created to work around filename and nested archive constraints
* Unzipping this file creates the main data folder containing all dataset components

**`data/data.parquet` columns:**
* **`instance_id`** (string)
    * Unique string identifier for each instance (GitHub issue)
* **`repo`** (string)
    * The GitHub repository relevant to the issue
    * Also accessible through the evaluation API
* **`problem_statement`** (string)
    * Textual description of the issue
    * Also accessible through the evaluation API
* **`patch`** (string)
    * The patch that resolves the issue
    * *Only provided in the train set*
* **`test_patch`** (string)
    * The patch that resolves the issue
    * *Only provided in the train set*
* **`pull_number`** (int)
    * The pull request number that resolved the issue
* **`base_commit`** (string)
    * The commit used as the foundation for the provided repository copy
* **`issue_numbers`** (int)
    * The original ID number of the GitHub issue
* **`[PASS_TO_PASS/FAIL_TO_PASS]`** (list)
    * Lists containing unit tests to be executed for this issue

**Directory Structure Details:**
* **`data/*/`**
    * All other subdirectories are utilized by the evaluation API
    * Used to configure evaluation environments
    * All evaluation environments run Python 3.11

**`kprize_setup/` description:**
* Contains files for installing the competition's adapted <b><a href="https://github.com/princeton-nlp/SWE-bench">swebench library</a></b>
* *Note: Currently not compatible with Windows systems*

**`kaggle_evaluation/` description:**
* Contains files implementing the evaluation API
* Implementation details may be useful for offline testing
* Recommended to start with the <b><a href="https://www.kaggle.com/code/sohier/konwinski-prize-demo-submission">demo submission notebook</a></b>
* Strong recommendation to run API in <b><a href="https://github.com/Kaggle/docker-python">Docker container based on Kaggle's image</a></b> for local execution
    * Helps avoid conflicts with existing Python environments
* The API will install if needed:
    * <b><a href="https://mamba.readthedocs.io/en/latest/user_guide/micromamba.html">Micromamba</a></b>
    * Several libraries (listed in kprize_setup/pip_packages)
    * Creates new Python environments as required

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">ADDITIONAL NOTES:</b>

* Further updates are planned for the evaluation API and kprize library
    * Aimed at improving runtime
    * Will provide additional useful tooling
    * Updates not expected to impact core submission workflow
* Refer to <b><a href="https://www.kaggle.com/competitions/konwinski-prize/discussion/552449">this forum post</a></b> for additional details
* Users are encouraged to source additional codebases for model training
* Most metadata is only available in the train set

<br>

<b style="text-decoration: underline; font-size: 15px; text-transform: uppercase; letter-spacing: 2px; font-weight: 900;">POST-EDA DATA OBSERVATIONS</b>

TBD

<br>


<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">2.6 GLOSSARY AND BACKGROUND RESEARCH</h3>
<hr>

<br>

I'm going to blow this out more in the future, most of my glossary/research is done by hand in a notebook right now, so this is just some preliminary definitions to get us started...

<br>

| Term | Definition | Example | Example Explanation |
|------|------------|---------|-------------------|
| Diff | A textual representation showing the differences between two versions of a file or set of files, highlighting added, removed, and modified lines. | `@@ -10,7 +10,7 @@\n def hello():\n-    print 'hello'\n+    print('hello')\n` | Shows a change from Python 2.x to 3.x print syntax. The `-` line shows removed code, the `+` line shows added code. |
| Patch | A file containing a set of changes (diff) that can be applied to a codebase. | `--- a/hello.py\n+++ b/hello.py\n@@ -1,3 +1,3 @@\n def greet(name):\n-    print "Hi, " + name\n+    print(f"Hi, {name}")` | A patch file showing the location (`hello.py`) and changes to update string formatting. |
| Test Patch | A modification specifically made to test files in a codebase. | `--- a/test_calc.py\n+++ b/test_calc.py\n@@ -1,2 +1,6 @@\n def test_add():\n-    assert calc.add(2,2) == 4\n+    assert calc.add(2,2) == 4\n+    assert calc.add(-1,1) == 0\n+    assert calc.add(0,0) == 0` | Adds new test cases to verify edge cases for an addition function. |
| Unix Patch Apply | A command-line operation that applies a patch file to a codebase. | `$ patch -p1 < bugfix.patch` | Applies changes from `bugfix.patch` to the codebase, where `-p1` strips one directory level from paths. |
| GitHub Issue | A tracked item in GitHub's issue system. | `Title: "TypeError when using pandas.read_csv() with custom delimiter"\nDescription: "When using a tab delimiter..."` | A bug report describing unexpected behavior with specific steps to reproduce. |
| Pull Request (PR) | A GitHub feature that proposes changes from one branch to another. | `PR #1234: "Fix TypeError in CSV parser with custom delimiters"` | A proposed solution to the issue, containing code changes and tests. |
| Base Commit | The specific commit that serves as the starting point for changes. | `git checkout a1b2c3d` | References commit `a1b2c3d` as the state before any changes are made. |
| Pass to Pass Tests (P2P) | Tests that passed before and should pass after changes. | `def test_existing():\n    assert len([1,2,3]) == 3` | A test verifying basic list functionality that shouldn't be affected by changes. |
| Fail to Pass Tests (F2P) | Tests that failed before and should pass after changes. | `def test_fix():\n    assert parser.read_tab('file.txt')` | A test specifically checking if the bug fix works. |
| SWE-bench Verified | 500 manually verified solvable problems. | `"Fix pandas DataFrame.fillna() with datetime values"` | A verified issue with clear reproduction steps and known solution. |
| SWE-bench Lite | 300 self-contained issues focused on functional bugs. | `"Fix off-by-one error in list slicing"` | A focused bug fix that doesn't require extensive codebase knowledge. |
| Oracle Retrieval | Providing only the files edited in the reference solution. | `{"files": ["pandas/core/frame.py", "pandas/tests/frame/test_datetime.py"]}` | Only retrieves the specific files that need modification. |
| BM25 Retrieval | Selecting relevant files based on issue description. | `query: "DataFrame fillna datetime"\nreturned: ["pandas/core/frame.py", ...]` | Uses text similarity to find potentially relevant files. |
| Breaking Resolved | Fixes the target issue but breaks existing functionality. | `Fixed: test_new_feature()\nBroken: test_old_feature()` | The change fixed the bug but introduced a regression. |
| Docker Evaluation | Containerized testing environment. | `docker run swe-bench eval --task pandas-1234` | Runs evaluation in an isolated, reproducible environment. |
| Gold Patch | Reference solution from the original PR. | `diff --git a/src/main.py\n--- a/src/main.py\n+++ b/src/main.py\n...` | The accepted solution that properly fixed the issue. |
| Cross-Context Edit | Changes requiring modifications across multiple files. | `Changed: src/parser.py, src/utils.py, tests/test_parser.py` | A fix that requires coordinated changes in multiple components. |


<br>

<a id="imports"></a>

<h1 style="font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: #48B0F7;" id="imports">3&nbsp;&nbsp;IMPORTS&nbsp;&nbsp;&nbsp;&nbsp;<a style="text-decoration: none; color: #203354;" href="#toc">&#10514;</a></h1>

<br>


In [2]:
print("\n... PIP INSTALLS STARTING ...\n")
!pip install -q google-genai  # For free Gemini access
!pip install -q /kaggle/input/konwinski-prize/kprize_setup/kprize-1.0.0-py3-none-any.whl --no-index --find-links /kaggle/input/konwinski-prize/kprize_setup/pip_packages
print("\n... PIP INSTALLS COMPLETE ...\n")

print("\n... IMPORTS STARTING ...\n")
print("\n\tVERSION INFORMATION")

# Competition specific imports
import kaggle_evaluation.konwinski_prize_inference_server
from datasets import load_dataset
from google.genai import types
from google import genai
import unidiff

import pandas as pd; pd.options.mode.chained_assignment = None; pd.set_option('display.max_columns', None)
import sklearn; print(f"\t\t– SKLEARN VERSION: {sklearn.__version__}")
import numpy as np; print(f"\t\t– NUMPY VERSION: {np.__version__}")
import polars as pl; print(f"\t\t– POLARS VERSION: {pl.__version__}")

# Built-In Imports (mostly don't worry about these)
from typing import Iterable, Any, Literal, Callable, Generator
from kaggle_datasets import KaggleDatasets
from dataclasses import dataclass
from collections import Counter
from datetime import datetime
from zipfile import ZipFile
from io import StringIO
from glob import glob
import subprocess
import tempfile
import warnings
import requests
import textwrap
import hashlib
import imageio
import IPython
import urllib
import zipfile
import pickle
import random
import shutil
import string
import json
import copy
import math
import time
import gzip
import ast
import sys
import io
import gc
import re
import os

# Rich
from rich import pretty; pretty.install()
from rich.markdown import Markdown
from rich import print as rprint
from rich.console import Console
from rich.style import Style
from rich.live import Live
from rich.text import Text
from rich import inspect

# Visualization Imports (overkill)
import matplotlib; print(f"\t\t– MATPLOTLIB VERSION: {matplotlib.__version__}");
from tqdm.notebook import tqdm; tqdm.pandas();
from IPython.core.display import HTML
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import plotly

def seed_it_all(seed: int = 7, fix_tf_seed: bool = False):
    """ Attempt to be Reproducible """
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)

    if fix_tf_seed:
        raise NotImplementedError("Not importing TF yet...")
        # tf.random.set_seed(seed)
    
seed_it_all()

print("\n\n... IMPORTS COMPLETE ...\n")


... PIP INSTALLS STARTING ...


... PIP INSTALLS COMPLETE ...


... IMPORTS STARTING ...


	VERSION INFORMATION
		– SKLEARN VERSION: 1.2.2
		– NUMPY VERSION: 1.26.4
		– POLARS VERSION: 1.15.0
		– MATPLOTLIB VERSION: 3.7.5


... IMPORTS COMPLETE ...



<br>

<a id="setup"></a>

<h1 style="font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: #48B0F7;" id="setup">4&nbsp;&nbsp;SETUP AND HELPER FUNCTIONS&nbsp;&nbsp;&nbsp;&nbsp;<a style="text-decoration: none; color: #203354;" href="#toc">&#10514;</a></h1>

<br>


<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">4.0 HELPER FUNCTIONS</h3>
<hr>

<br>


In [3]:
"""Rich visualization utility."""

import pathlib
from rich.filesize import decimal
from rich.markup import escape
from rich.text import Text
from rich.tree import Tree

DEFAULT_IGNORE_LIST: Iterable[str] = ".ipynb_checkpoints", ".DS_Store", ".git", ".idea", ".coverage", ".pytest_cache"

def get_directory_tree(
        directory: pathlib.Path,
        tree: Tree | None = None,
        show_hidden: bool = False,
        inplace: bool = False,
        ignore_list_extras: Iterable[str] = (),
) -> Tree | None:
    """Recursively build a Tree with directory contents.

    Args:
        directory (pathlib.Path): The directory to walk.
        tree (Tree, optional):
            The Tree object to build.
            If not provided, a new Tree is created.
        show_hidden (bool, optional):
            Whether to show hidden files.
        inplace (bool, optional):
            Whether to print the tree in place.
            If False, the tree is returned.
        ignore_list_extras (Iterable[str], optional):
            Additional file extensions to ignore.

    Returns:
        If inplace is False, the Tree object with the directory contents.
        Else, None.
    """
    # Get the ignore list that includes the default and any extras
    ignore_list = sorted(set(DEFAULT_IGNORE_LIST) | set(ignore_list_extras))

    # Create a new Tree if one is not provided
    tree = tree or Tree(label=f"[bold]{directory!s}[/bold] File Tree")  # type: ignore

    # Sort dirs first then by filename
    paths = sorted(
        pathlib.Path(directory).iterdir(),
        key=lambda path: (path.is_file(), path.name.lower()),
    )

    # Sort dirs first then by filename
    for path in paths:

        # Remove hidden files if show_hidden is False
        if path.name.startswith(".") and not show_hidden:
            continue

        # Skip files in the ignore list by suffix or name
        if path.is_file() and (path.suffix in ignore_list or path.name in ignore_list):
            continue

        # Skip directories only by name (not suffix)
        if path.is_dir() and path.name in ignore_list:
            continue

        # Add the directory to the tree
        if path.is_dir():

            # Style directories starting with "__" differently
            style = "dim" if path.name.startswith("__") else ""

            # Add the directory to the tree
            branch = tree.add(
                f"[bold magenta]:open_file_folder: [link file://{path}]{escape(path.name)}",
                style=style,
                guide_style=style,
            )
            get_directory_tree(path, branch)

        # Add the file to the tree
        else:
            main_style = "dim green" if path.name.startswith("_") else "green"
            ext_style = "dim red" if path.name.startswith("_") else "bold red"
            file_size_style = "dim blue" if path.name.startswith("_") else "blue"
            text_filename = Text(path.name, main_style)
            text_filename.highlight_regex(r"\..*$", ext_style)
            text_filename.stylize(f"link file://{path}")
            text_filename.append(f" ({decimal(path.stat().st_size)})", file_size_style)
            if path.suffix == ".py":
                icon = "🐍 "
            elif path.suffix == ".ipynb":
                icon = "🐍📓 "
            elif path.suffix == ".sh":
                icon = "🔧 "
            elif ".env" in path.name.lower():
                icon = "🔑 "
            elif path.suffix == ".csv":
                icon = "📊 "
            elif path.suffix in [".yaml", ".yml", ".json"]:
                icon = "📜 "
            elif path.suffix in [".txt", ".md"]:
                icon = "📝 "
            elif path.suffix in [".png", ".jpg", ".jpeg", ".gif", ".svg"]:
                icon = "🖼️ "
            elif path.suffix in [".zip", ".tar", ".gz", ".7z"]:
                icon = "📦 "
            elif path.suffix in [".pdf"]:
                icon = "📰 "
            elif path.suffix in [".mp4", ".avi", ".mov", ".mkv"]:
                icon = "🎥 "
            elif path.suffix in [".mp3", ".wav", ".flac"]:
                icon = "🎵 "
            elif path.suffix in [".html", ".css", ".js"]:
                icon = "🌐 "
            elif path.suffix in [".exe", ".msi"]:
                icon = "🛠️ "
            elif path.suffix in [".docx", ".pptx", ".xlsx"]:
                icon = "📄 "
            elif path.suffix in [".parquet", ".feather"]:
                icon = "🧼 "
            elif path.suffix in [".db", ".sqlite", ".sql", ".jsonl"]:
                icon = "🗄️ "
            else:
                icon = "📄 "

            # Prefix hidden files with a "🤫" emoji
            if path.name.startswith("."):
                icon = "🤫"+icon

            # Add the file to the tree (with icon prefix)
            tree.add(Text(icon) + text_filename)

    # If inplace is False, return the tree... otherwise the Tree object is updated in place
    if not inplace:
        return tree
    return None


In [4]:
def stream_with_styling(chunk_generator: Any, return_completion: bool = True) -> None:
    """Streams text content with real-time Markdown styling using Rich library.
    
    Processes chunks of text from a generator and displays them with live Markdown 
    formatting, continuously updating the display as new chunks arrive.
    
    Args:
        chunk_generator (Any): 
            A generator-like object that yields objects containing text attributes.
            Each chunk object must have a 'text' attribute containing the text to display.
        return_completion (bool, optional):
            Whether we return the consolidated text.
    
    Returns:
        str; The text that was returned by the model 
        --OR--
        None;
    """
    console = Console()
    with Live(console=console, auto_refresh=False) as live:
        current_text = ""
        for chunk in chunk_generator:
            current_text += chunk.text
            live.update(Markdown(current_text))
            live.refresh()
    
        # Final render
        live.update(Markdown(current_text))
        live.refresh()
        time.sleep(0.01)
    
    return current_text if return_completion else None

class MyUserSecretsClient:
    """A wrapper around UserSecretsClient enhancing secret management functionality.
    
    This class extends the base UserSecretsClient by adding error handling and 
    environment variable integration capabilities.
    """
    
    def __init__(self) -> None:
        """Initialize the MyUserSecretsClient with a base UserSecretsClient instance.
        
        Attributes:
            _user_secrets_client (UserSecretsClient):
                The original secret management client object we wrap.
        """
        self._user_secrets_client = UserSecretsClient()

    def get_secret(self, label: str, default: Any = None) -> Any:
        """Retrieve a secret by label with fallback to default value.
        
        Attempts to retrieve a secret value, handling potential errors gracefully
        by returning a default value if the secret cannot be accessed.
        
        Args:
            label (str): 
                The identifier of the secret to retrieve.
            default (Any, optional): 
                Value to return if the secret cannot be retrieved.
        
        Returns:
            The secret value if successfully retrieved (str), 
            Otherwise, the default value specified (by default this is None).
        
        Example:
            >>> client = MyUserSecretsClient()
            >>> api_key = client.get_secret("API_KEY", default="fallback_key")
        """
        try:
            return self._user_secrets_client.get_secret(label)
        except BackendError:
            # Expected error when secret doesn't exist
            return default
        except Exception as e:
            # Log unexpected errors while still maintaining graceful fallback
            print(
                f"Unexpected error while retrieving secret '{label}': {str(e)}. "
                "This may indicate a problem beyond the secret not existing."
            )
            return default

    def secret_to_env(
        self, 
        label: str, 
        env_var_label: str | None = None, 
        default: Any = None
    ) -> None:
        """Set an environment variable using a secret value.
        
        Retrieves a secret and sets it as an environment variable. If the secret
        cannot be retrieved, uses the provided default value instead.

        Note the provided value will be cast as a string when set as an environment variable.
        
        Args:
            label (str): 
                The identifier of the secret to retrieve.
            env_var_label (str, optional): 
                The name of the environment variable to set.
                If None, uses the secret's label.
            default (Any, optional): 
                Value to use if the secret cannot be retrieved.
        
        Returns:
            None; 
                An environment variable will be updated/created with the 
                appropriate value.
                
        
        Example:
            >>> client = MyUserSecretsClient()
            >>> client.secret_to_env("DB_PASSWORD", "DATABASE_PASSWORD", "default_pass")
        """
        env_var_label = env_var_label if env_var_label is not None else label
        os.environ[env_var_label] = str(self.get_secret(label, default))


def is_valid_patch_format(patch: str | StringIO) -> bool:
    """Validates if the input string represents a valid unified diff patch format.

    This function checks if the provided input can be parsed as a valid unified diff
    patch using the unidiff library. It verifies both the syntax and presence of
    actual patch content.

    Based on: https://www.kaggle.com/code/sohier/patch-validation-snippet

    Args:
        patch (str | StringIO): A string or StringIO object containing the potential patch content.
            The patch should be in unified diff format
                - created by diff -u or similar tools
                - or generated by an LLM as in our case.

    Returns:
        bool: True if the input is a valid non-empty patch, False otherwise.

    Raises:
        unidiff.UnidiffParseError: Parsing exception
        Exception: Any other problems with the string
    
    Examples:
        >>> is_valid_patch_format("--- a/file.txt\n+++ b/file.txt\n@@ -1,1 +1,1 @@\n-old\n+new")
        True
        >>> is_valid_patch_format("invalid content")
        False
    """
    # All patches must be of type StringIO
    if not isinstance(patch, (str, StringIO)):
        return False

    try:
        # Convert string to StringIO if needed for consistent handling
        patch_content = StringIO(patch) if isinstance(patch, str) else patch
        
        # Attempt to parse the patch (may trigger an error on fail which results in False)
        patch_set = unidiff.PatchSet(patch_content)
        
        # Verify the patch contains actual changes
        if len(patch_set) == 0:
            print("Patch is either not a valid patch or contains no actual changes!\n")
            return False
            
        # Additional validation: check if there are any actual changes
        #   - Check if file has any hunks, if so we return True.
        #   - Otherwise return False
        return any(len(patched_file) > 0 for patched_file in patch_set)

    # Log expected unidiff parsing errors and return False
    except unidiff.UnidiffParseError as e:
        print(f"Unidiff parsing error (returning False)\n\tunidiff.UnidiffParseError: {str(e)}\n")
        return False

    # Log unexpected errors while still returning False
    except Exception as e:
        print(f"Unexpected error validating patch (returning False)\n\tGeneral Exception: {str(e)}\n")
        return False


def calculate_kprize_score(correct: int, incorrect: int, skipped: int) -> float:
    """Calculate the SWE-Bench score based on correct, incorrect, and skipped solutions.
    
    Args:
        correct (int): Number of correctly resolved issues (a)
        incorrect (int): Number of failing issues (b)
        skipped (int): Number of skipped issues (c)
    
    Returns:
        float: Score calculated using (a-b)/(a+b+c) formula
    """
    # All values must be positive or 0 and all values cannot be 0
    correct, incorrect, skipped = max(0, correct), max(0, incorrect), max(0, skipped)
    if not any([correct, incorrect, skipped]):
        return 0.0
        
    # Calculate score using provided formula (no 0 check required due to above)
    return (correct - incorrect) / (correct + incorrect + skipped)

def calculate_swebench_score(correct: int, incorrect: int) -> float:
    """Calculate the SWE-Bench score based on correct and incorrect solutions.
    
    Args:
        correct (int): Number of correctly resolved issues (a)
        incorrect (int): Number of failing issues (b)
    
    Returns:
        float: Score calculated using a/b formula
    """
    # All values must be positive or 0 and all values cannot be 0
    correct, incorrect = max(0, correct), max(0, incorrect)
    if not any([correct, incorrect]):
        return 0.0
        
    # Calculate score using provided formula (no 0 check required due to above)
    return correct / incorrect

    
def flatten(items: Iterable[Any], as_list: bool = True) -> Generator[Any, None, None] | list[Any]:
   """Flattens an iterable of items or nested iterables into a single level sequence.

    Strings are treated as atomic elements and will not be flattened.
    Dictionaries are treated as lists where the keys are the elements.

   Args:
       items (Iterable[Any]): 
           An iterable containing either individual elements or nested iterables.
       as_list (bool, optional): 
           If True, returns a list. [DEFAULT BEHAVIOUR]
           If False, returns a generator.

   Returns:
       If `as_list` is True, returns a flattened list.
       If `as_list` is False, returns a generator yielding flattened elements.

   Examples:
       Basic usage returning a list:
       >>> nested = [1, [2, 3], [4, [5, 6]]]
       >>> flatten(nested)
       [1, 2, 3, 4, 5, 6]

       Using generator output:
       >>> nested = ['a', ['b', 2], 'c']
       >>> list(flatten(nested, as_list=False))
       ['a', 'b', 2, 'c']

       Handles non-nested iterables:
       >>> simple = [1, 2, 3]
       >>> flatten(simple)
       [1, 2, 3]

   Raises:
       TypeError: If input is not an iterable.
   """
   def _flatten_generator(items: Iterable[Any]) -> Generator[Any, None, None]:
       # Iterate through each item in the input iterable
       for item in items:
           # Check if item is an iterable but not a string/bytes
           # Strings/bytes are treated as atomic elements
           if isinstance(item, Iterable) and not isinstance(item, (str, bytes)):
               # Recursively flatten nested iterables
               yield from _flatten_generator(item)
           else:
               # Yield non-iterable items directly
               yield item

   # Return either a generator or list based on as_list parameter
   generator = _flatten_generator(items)
   return list(generator) if as_list else generator
        
### TEST IT OUT IF YOU WANT ###
# rprint(Markdown("---"))
# # Valid patch
# if is_valid_patch_format(dev_df.patch[0]):
#     rprint("[bold green]SUCCESS[/bold green]")
# else:
#     rprint("[bold red]FAILURE[/bold red]")
# rprint(Markdown("---"))

# # Example showing unidiff parsing error
# if is_valid_patch_format(dev_df.patch[0].replace(".py", "0.py", 3)):
#     rprint("[bold green]SUCCESS[/bold green]")
# else:
#     rprint("[bold red]FAILURE[/bold red]")
# rprint(Markdown("---"))

# # Example showing complete failure (weird dash)
# if is_valid_patch_format(dev_df.patch[0].replace("-", "–")):
#     rprint("[bold green]SUCCESS[/bold green]")
# else:
#     rprint("[bold red]FAILURE[/bold red]")
# rprint(Markdown("---"))

# # Example showing complete failure (random stringf)
# if is_valid_patch_format("invalid content"):
#     rprint("[bold green]SUCCESS[/bold green]")
# else:
#     rprint("[bold red]FAILURE[/bold red]")
# rprint(Markdown("---"))

### TRY IT OUT ###
# flatten(((1,[2,{3: "hi"}]), ["a b c d".split(), "abcd", "b", "5", 5, ["hi", "there"]]), as_list=False)

<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">4.1 PATH DEFINITIONS</h3>
<hr>

<br>


In [5]:
# Rudimentary Paths
BASE_DIR = "/kaggle"
TMP_DIR = os.path.join(BASE_DIR, "tmp")
WORKING_DIR = os.path.join(BASE_DIR, "working")
INPUT_DIR = os.path.join(BASE_DIR, "input")

# Basic Competition Paths
COMP_DIR = os.path.join(INPUT_DIR, "konwinski-prize")
COMP_KAGGLE_EVALUATION_DIR = os.path.join(COMP_DIR, "kaggle_evaluation")
COMP_KPRIZE_SETUP_DIR = os.path.join(COMP_DIR, "kprize_setup")

# Dataset Competition Paths
COMP_DATA_ZIP_PATH = os.path.join(COMP_DIR, "data.a_zip")
COMP_TMP_DIR = os.path.join(TMP_DIR, "konwinski-prize-alt")
COMP_TMP_DATA_DIR = os.path.join(COMP_TMP_DIR, "data")
COMP_DATA_PARQUET_PATH = os.path.join(COMP_TMP_DATA_DIR, "data.parquet")
COMP_CONDA_PACKAGES_DIR = os.path.join(COMP_TMP_DATA_DIR, "conda_packages")
COMP_PIP_PACKAGES_DIR = os.path.join(COMP_TMP_DATA_DIR, "pip_packages")
COMP_REPO_CONFIGS_DIR = os.path.join(COMP_TMP_DATA_DIR, "repo_configs")
COMP_REPOS_DIR = os.path.join(COMP_TMP_DATA_DIR, "repos")

# SWE Dataset Paths ... https://huggingface.co/datasets/...
HF_SWE_BENCH_PROVIDER = "princeton-nlp"
HF_SWE_BENCH_PATH = os.path.join(HF_SWE_BENCH_PROVIDER, "SWE-bench")
HF_SWE_BENCH_LITE_PATH = os.path.join(HF_SWE_BENCH_PROVIDER, "SWE-bench_Lite")
HF_SWE_BENCH_VERIFIED_PATH = os.path.join(HF_SWE_BENCH_PROVIDER, "SWE-bench_Verified")

<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">4.2 SECRET/ENVIRONMENT LOADING/SETUP</h3>
<hr>

<br>

In [6]:
USE_KAGGLE_SECRETS = True
SECRETS_TO_USE = ["GEMINI_API_KEY", "HUGGINGFACE_TOKEN", "OPENAI_API_KEY"]

if USE_KAGGLE_SECRETS:
    from kaggle_secrets import UserSecretsClient, BackendError
    user_secrets = MyUserSecretsClient()
    for secret in SECRETS_TO_USE: 
        user_secrets.secret_to_env(secret)
else:
    from dotenv import load_dotenv, find_dotenv
    _ = load_dotenv(find_dotenv())

# Check if env is loaded
for secret in SECRETS_TO_USE:
    if os.getenv(secret):
        print(f"✅  The {repr(secret)} value HAS been set.")
    else:
        print(f"❌  The {repr(secret)} value HAS NOT set.")

client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

rprint("\n\n[bold cyan]TESTING OUR GEMINI AUTHENTICATION WITH A MESSAGE TO GEMINI FLASH![/bold cyan]\n")
DEMO_MESSAGE = "Tell me about Andy Konwinski the Computer scientist"

stream_with_styling(client.models.generate_content_stream(model="gemini-2.0-flash-exp", contents=DEMO_MESSAGE))

✅  The 'GEMINI_API_KEY' value HAS been set.
✅  The 'HUGGINGFACE_TOKEN' value HAS been set.
✅  The 'OPENAI_API_KEY' value HAS been set.


Output()

[32m"Alright, let's delve into the world of Andy Konwinski, a prominent figure in the field of computer science, particularly known for his contributions to **data science, distributed systems, and the Apache Spark ecosystem.**\n\nHere's a breakdown of what makes Andy Konwinski a significant computer scientist:\n\n**Key Areas of Contribution:**\n\n* **Co-founder of Databricks:** This is perhaps his most well-known achievement. Along with Matei Zaharia [0m[32m([0m[32mthe creator of Spark[0m[32m)[0m[32m and other colleagues at UC Berkeley's AMPLab, Andy co-founded Databricks, a company that built its business around the commercialization and development of Apache Spark. Databricks has become a leading platform for big data analytics and machine learning.\n\n* **Apache Spark Core Contributor:** While not the original creator, Andy was a significant early contributor to Apache Spark, particularly focusing on aspects like performance optimization, distributed execution, and the und

<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">4.3 LOAD THE SWE-LITE DATASET</h3>
<hr>

<br>

This dataset is a slim version of the full SWE-Bench dataset and can be found on <b><a href="https://huggingface.co/datasets/princeton-nlp/SWE-bench_Lite">HuggingFace</a></b>

This dataset contains two splits:
* **DEV**
    * 27 Examples
* **TEST**
    * 300 Examples

In [7]:
# Load the huggingface dataset
swe_bench_lite_ds = load_dataset(HF_SWE_BENCH_LITE_PATH)

# Create the sub-dataframes
dev_df = swe_bench_lite_ds["dev"].to_pandas()
test_df = swe_bench_lite_ds["test"].to_pandas()

# Display The Two Dataframes
rprint("\n[bold blue]DEV SWE BENCH LITE DATAFRAME[/bold blue]")
display(dev_df.head(3))

rprint("\n[bold green]TEST SWE BENCH LITE DATAFRAME[/bold green]")
display(test_df.head(3))

README.md:   0%|          | 0.00/3.67k [00:00<?, ?B/s]

dev-00000-of-00001.parquet:   0%|          | 0.00/120k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.12M [00:00<?, ?B/s]

Generating dev split:   0%|          | 0/23 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/300 [00:00<?, ? examples/s]

Unnamed: 0,repo,instance_id,base_commit,patch,test_patch,problem_statement,hints_text,created_at,version,FAIL_TO_PASS,PASS_TO_PASS,environment_setup_commit
0,sqlfluff/sqlfluff,sqlfluff__sqlfluff-1625,14e1a23a3166b9a645a16de96f694c77a5d4abb7,diff --git a/src/sqlfluff/rules/L031.py b/src/...,diff --git a/test/cli/commands_test.py b/test/...,"TSQL - L031 incorrectly triggers ""Avoid using ...","Actually, re-reading the docs I think this is ...",2021-10-13T11:35:29Z,0.6,"[""test/cli/commands_test.py::test__cli__comman...","[""test/cli/commands_test.py::test__cli__comman...",67023b85c41d23d6c6d69812a41b207c4f8a9331
1,sqlfluff/sqlfluff,sqlfluff__sqlfluff-2419,f1dba0e1dd764ae72d67c3d5e1471cf14d3db030,diff --git a/src/sqlfluff/rules/L060.py b/src/...,diff --git a/test/rules/std_L060_test.py b/tes...,Rule L060 could give a specific error message\...,"@tunetheweb Yeah definitely, should be a prett...",2022-01-22T12:21:52Z,0.8,"[""test/rules/std_L060_test.py::test__rules__st...",[],a5c4eae4e3e419fe95460c9afd9cf39a35a470c4
2,sqlfluff/sqlfluff,sqlfluff__sqlfluff-1733,a1579a16b1d8913d9d7c7d12add374a290bcc78c,diff --git a/src/sqlfluff/rules/L039.py b/src/...,diff --git a/test/rules/std_L003_L036_L039_com...,Extra space when first field moved to new line...,Does running `sqlfluff fix` again correct the ...,2021-10-22T18:23:33Z,0.6,"[""test/rules/std_L003_L036_L039_combo_test.py:...","[""test/rules/std_L016_L36_combo_test.py::test_...",67023b85c41d23d6c6d69812a41b207c4f8a9331


Unnamed: 0,repo,instance_id,base_commit,patch,test_patch,problem_statement,hints_text,created_at,version,FAIL_TO_PASS,PASS_TO_PASS,environment_setup_commit
0,astropy/astropy,astropy__astropy-12907,d16bfe05a744909de4b27f5875fe0d4ed41ce607,diff --git a/astropy/modeling/separable.py b/a...,diff --git a/astropy/modeling/tests/test_separ...,Modeling's `separability_matrix` does not comp...,,2022-03-03T15:14:54Z,4.3,"[""astropy/modeling/tests/test_separable.py::te...","[""astropy/modeling/tests/test_separable.py::te...",298ccb478e6bf092953bca67a3d29dc6c35f6752
1,astropy/astropy,astropy__astropy-14182,a5917978be39d13cd90b517e1de4e7a539ffaa48,diff --git a/astropy/io/ascii/rst.py b/astropy...,diff --git a/astropy/io/ascii/tests/test_rst.p...,Please support header rows in RestructuredText...,,2022-12-16T11:13:37Z,5.1,"[""astropy/io/ascii/tests/test_rst.py::test_rst...","[""astropy/io/ascii/tests/test_rst.py::test_rea...",5f74eacbcc7fff707a44d8eb58adaa514cb7dcb5
2,astropy/astropy,astropy__astropy-14365,7269fa3e33e8d02485a647da91a5a2a60a06af61,diff --git a/astropy/io/ascii/qdp.py b/astropy...,diff --git a/astropy/io/ascii/tests/test_qdp.p...,ascii.qdp Table format assumes QDP commands ar...,Welcome to Astropy 👋 and thank you for your fi...,2023-02-06T19:20:34Z,5.1,"[""astropy/io/ascii/tests/test_qdp.py::test_rou...","[""astropy/io/ascii/tests/test_qdp.py::test_get...",5f74eacbcc7fff707a44d8eb58adaa514cb7dcb5


<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">4.4 LOAD THE KPRIZE DATASET</h3>
<hr>

<br>

This is the provided 'training' data. Really these are just a few examples to show us the format and structure that we are to expect. It's on us to gather more data probably.

In [8]:
# (1) Handle unzipping the compressed dataset (or skip if already done)
# Check if the zip file has already been uncompressed
if not os.path.isfile(COMP_DATA_PARQUET_PATH):    
    # Make the directory to unzip to
    os.makedirs(COMP_TMP_DIR, exist_ok=True)
    
    # Open and extract the zip file
    with ZipFile(COMP_DATA_ZIP_PATH, 'r') as zip_ref:
        zip_ref.extractall(COMP_TMP_DIR)

# (2) Load into a dataframe
kprize_df = pd.read_parquet(COMP_DATA_PARQUET_PATH)
display(kprize_df)

Unnamed: 0,instance_id,repo,problem_statement,patch,test_patch,pull_number,base_commit,PASS_TO_PASS,FAIL_TO_PASS,issue_numbers
0,pylint-dev__astroid-2496,pylint-dev/astroid,TypeError: unsupported format string passed to...,diff --git a/ChangeLog b/ChangeLog\nindex 4560...,diff --git a/tests/test_inference.py b/tests/t...,2496,8d3cdbbe6685fd8cf211816bec56c90f38f1859e,[tests/test_inference.py::InferenceUtilsTest::...,[tests/test_inference.py::test_formatted_fstri...,[2492]
1,pylint-dev__astroid-2468,pylint-dev/astroid,Pylint checks against incorrect type with prop...,diff --git a/ChangeLog b/ChangeLog\nindex fdbb...,diff --git a/tests/test_inference.py b/tests/t...,2468,6db3a60553ff538a936d5dda23d67a3924a57f45,[tests/test_inference.py::InferenceUtilsTest::...,[tests/test_inference.py::InferenceTest::test_...,[2467]
2,astropy__astropy-17048,astropy/astropy,QTable cannot take `dimensionless_unscaled` wh...,diff --git a/astropy/table/table.py b/astropy/...,diff --git a/astropy/table/tests/test_table.py...,17048,d60f6b72cd525262bfd179331d9fe4474177918f,[astropy/table/tests/test_table.py::TestSetTab...,[astropy/table/tests/test_table.py::test_qtabl...,[17047]
3,astropy__astropy-16898,astropy/astropy,BUG: tables do not deal well with zero-sized s...,diff --git a/astropy/io/registry/core.py b/ast...,diff --git a/astropy/io/fits/tests/test_connec...,16898,ee6d087baf301c1d08db92e6e5b6d909d57e6fac,[astropy/io/fits/tests/test_connect.py::TestSi...,[astropy/io/fits/tests/test_connect.py::test_z...,[16897]
4,astropy__astropy-16830,astropy/astropy,KeyError: 'version_1_3_or_later' when parsing ...,diff --git a/astropy/io/votable/tree.py b/astr...,diff --git a/astropy/io/votable/tests/test_tre...,16830,e39f486fec48d87aa3677326167954370d7a7bf9,[astropy/io/votable/tests/test_tree.py::test_c...,[astropy/io/votable/tests/test_tree.py::test_v...,"[16825, 16826]"
5,astropy__astropy-16812,astropy/astropy,Provide a way to make a copy of a model with d...,diff --git a/astropy/modeling/core.py b/astrop...,diff --git a/astropy/modeling/tests/test_core....,16812,c241103c11954d3c1cfe3c1840b1ece72479c522,[astropy/modeling/tests/test_core.py::test_Mod...,[astropy/modeling/tests/test_core.py::test_res...,[16593]


<br>

<a id="eda"></a>

<h1 style="font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: #48B0F7;" id="eda">5&nbsp;&nbsp;EXPLORATORY DATA ANALYSIS&nbsp;&nbsp;&nbsp;&nbsp;<a style="text-decoration: none; color: #203354;" href="#toc">&#10514;</a></h1>

<br>


<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">5.1 KPRIZE DATA</h3>
<hr>

<br>

From the competition readme and our earlier investigation we know that the dataframe contains the following:

* **`instance_id`** (string)
    * Unique string identifier for each instance (GitHub issue)
* **`repo`** (string)
    * The GitHub repository relevant to the issue
    * Also accessible through the evaluation API
* **`problem_statement`** (string)
    * Textual description of the issue
    * Also accessible through the evaluation API
* **`patch`** (string)
    * The patch that resolves the issue
    * *Only provided in the train set*
* **`test_patch`** (string)
    * The patch that resolves the issue
    * *Only provided in the train set*
* **`pull_number`** (int)
    * The pull request number that resolved the issue
* **`base_commit`** (string)
    * The commit used as the foundation for the provided repository copy
* **`issue_numbers`** (int)
    * The original ID number of the GitHub issue
* **`[PASS_TO_PASS/FAIL_TO_PASS]`** (list)
    * Lists containing unit tests to be executed for this issue

<center><div class="alert alert-block alert-warning" style="margin: 2em; line-height: 1.7em;">
    <b style="font-size: 16px; font-weight: 900;">🔔 &nbsp; NOTE &nbsp; 🔔</b><br><br><span>Since we only have <b>5 examples</b>, we won’t get extensive statistical insights, but we can still carry out some basic exploratory data analysis (EDA) to understand each feature, spot any quirks in the data, and think through potential next steps.</span>
</div></center>





In [9]:
rprint(f"{kprize_df.shape=}\n")

rprint("\nAny Missing Values?\n")
rprint(kprize_df.isnull().sum())

rprint("\nDatatypes?\n")
kprize_df.info(show_counts=True)

rprint("\nRepo Distribution?\n")
rprint(kprize_df['repo'].value_counts())
# display(kprize_df['repo'].value_counts().plot(kind='bar'))

# Fixes can reference more than one GitHub issue.
rprint("\nNumber of Issues per PR\n")
kprize_df["issue_numbers"].apply(len).value_counts()

kprize_df['problem_statement_length'] = kprize_df['problem_statement'].apply(lambda x: len(x.split()))
rprint("\nProblem Statement Lengths\n")
display(kprize_df['problem_statement_length'].describe())

rprint("\nPatch Lengths\n")
kprize_df['patch_length'] = kprize_df['patch'].apply(lambda x: len(x))
kprize_df['test_patch_length'] = kprize_df['test_patch'].apply(lambda x: len(x))
display(kprize_df[['patch_length', 'test_patch_length']].describe())

rprint("\nTest Counts\n")
kprize_df['PASS_TO_PASS_count'] = kprize_df['PASS_TO_PASS'].apply(len)
kprize_df['PASS_TO_PASS_count'] = kprize_df['FAIL_TO_PASS'].apply(len)
display(kprize_df[['PASS_TO_PASS', 'FAIL_TO_PASS', 'PASS_TO_PASS_count', 'PASS_TO_PASS_count']])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   instance_id        6 non-null      object
 1   repo               6 non-null      object
 2   problem_statement  6 non-null      object
 3   patch              6 non-null      object
 4   test_patch         6 non-null      object
 5   pull_number        6 non-null      int64 
 6   base_commit        6 non-null      object
 7   PASS_TO_PASS       6 non-null      object
 8   FAIL_TO_PASS       6 non-null      object
 9   issue_numbers      6 non-null      object
dtypes: int64(1), object(9)
memory usage: 608.0+ bytes



count      [1;36m6.000000[0m
mean     [1;36m297.166667[0m
std      [1;36m162.149828[0m
min       [1;36m72.000000[0m
[1;36m25[0m%      [1;36m171.750000[0m
[1;36m50[0m%      [1;36m358.000000[0m
[1;36m75[0m%      [1;36m407.000000[0m
max      [1;36m462.000000[0m
Name: problem_statement_length, dtype: float64

Unnamed: 0,patch_length,test_patch_length
count,6.0,6.0
mean,2337.833333,2255.833333
std,1380.618328,629.710542
min,912.0,1339.0
25%,1382.25,2069.5
50%,2195.0,2214.5
75%,2723.5,2405.25
max,4714.0,3277.0


Unnamed: 0,PASS_TO_PASS,FAIL_TO_PASS,PASS_TO_PASS_count,PASS_TO_PASS_count.1
0,[tests/test_inference.py::InferenceUtilsTest::...,[tests/test_inference.py::test_formatted_fstri...,2,2
1,[tests/test_inference.py::InferenceUtilsTest::...,[tests/test_inference.py::InferenceTest::test_...,3,3
2,[astropy/table/tests/test_table.py::TestSetTab...,[astropy/table/tests/test_table.py::test_qtabl...,3,3
3,[astropy/io/fits/tests/test_connect.py::TestSi...,[astropy/io/fits/tests/test_connect.py::test_z...,2,2
4,[astropy/io/votable/tests/test_tree.py::test_c...,[astropy/io/votable/tests/test_tree.py::test_v...,1,1
5,[astropy/modeling/tests/test_core.py::test_Mod...,[astropy/modeling/tests/test_core.py::test_res...,2,2


<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">5.2 SWE-BENCH LITE DATA</h3>
<hr>

<br>


SWE-bench was designed to provide a diverse set of codebase problems that were verifiable using in-repo unit tests. The full SWE-bench test split comprises 2,294 issue-commit pairs across 12 python repositories.
<br>
<br>
Since its release, we've found that for most systems evaluating on SWE-bench, running each instance can take a lot of time and compute. We've also found that SWE-bench can be a particularly difficult benchmark, which is useful for evaluating LMs in the long term, but discouraging for systems trying to make progress in the short term.
<br>
<br>
To remedy these issues, we've released a canonical subset of SWE-bench called SWE-bench Lite. SWE-bench Lite comprises 300 instances from SWE-bench that have been sampled to be more self-contained, with a focus on evaluating functional bug fixes. SWE-bench Lite covers 11 of the original 12 repositories in SWE-bench, with a similar diversity and distribution of repositories as the original. We perform similar filtering on the SWE-bench dev set to provide 23 development instances that can be useful for active development on the SWE-bench task. We recommend future systems evaluating on SWE-bench to report numbers on SWE-bench Lite in lieu of the full SWE-bench set if necessary. You can find the source code for how SWE-bench Lite was created in <a href="https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/collect/make_lite">SWE-bench/swebench/collect/make_lite</a>.
<br>
<br>
Here's a list of the general criteria we used to select SWE-bench Lite instances:

<ul>
    <li> We remove instances with images, external hyperlinks, references to specific commit shas and references to other pull requests or issues. </li>
    <li> We remove instances that have fewer than 40 words in the problem statement. </li>
    <li> We remove instances that edit more than 1 file. </li>
    <li> We remove instances where the gold patch has more than 3 edit hunks (see patch). </li>
    <li> We remove instances that create or remove files. </li>
    <li> We remove instances that contain tests with error message checks. </li>
    <li> Finally, we sample 300 test instances and 23 development instances from the remaining instances. </li>
</ul>
<br>
          
You can download SWE-bench Lite and its baselines from Hugging Face Datasets:

<ul>
    <li><a style="width: 100%" href="https://huggingface.co/datasets/princeton-nlp/SWE-bench_Lite">🤗 SWE-bench Lite</a></li>
    <li><a style="width: 100%" href="https://huggingface.co/datasets/princeton-nlp/SWE-bench_Lite_oracle">🤗 "Oracle" Retrieval Lite</a></li>
    <li><a style="width: 100%" href="https://huggingface.co/datasets/princeton-nlp/SWE-bench_Lite_bm25_13K">🤗 BM25 Retrieval 13K Lite</a></li>
    <li><a style="width: 100%" href="https://huggingface.co/datasets/princeton-nlp/SWE-bench_Lite_bm25_27K">🤗 BM25 Retrieval 27K Lite</a></li>
<ul>
<br>

<div style="display: flex; justify-content: center; align-items: flex-start;">  
  <!-- First image + text -->
  <div style="max-width: 400px; text-align: center; margin: 2%;">
    <img 
      src="https://www.swebench.com/img/swebench-lite-pie.png"
      style="width: 100%; max-width: 400px;"
      alt="Pie chart for SWE-bench Lite distribution"
    >
    <p>
      SWE-bench Lite distribution across repositories. Compare to the full SWE-bench 
      in Figure 3 of the 
      <a href="https://arxiv.org/abs/2310.06770">SWE-bench paper</a>.
    </p>
  </div>
  <!-- Second image + text -->
  <div style="max-width: 400px; text-align: center; margin: 2%;">
    <img 
      src="https://www.swebench.com/img/swe-bench_lite_results.png"
      style="width: 100%; max-width: 400px;"
      alt="Bar chart for SWE-bench Lite performance"
    >
    <p>
      SWE-bench Lite performance for our baselines. Compare to the full SWE-bench baseline 
      performance in Table 5 of the 
      <a href="https://arxiv.org/abs/2310.06770">SWE-bench paper</a>.
    </p>
  </div>
</div>


| Field Name                | Type   | Description                                                                                      |
|---------------------------|--------|--------------------------------------------------------------------------------------------------|
| `instance_id`             | str    | A formatted instance identifier, usually as `repo_owner__repo_name-PR-number`.                 |
| `patch`                   | str    | The gold patch, the patch generated by the PR (minus test-related code), that resolved the issue.|
| `repo`                    | str    | The repository owner/name identifier from GitHub.                                               |
| `base_commit`             | str    | The commit hash of the repository representing the HEAD of the repository before the solution PR is applied. |
| `hints_text`              | str    | Comments made on the issue prior to the creation of the solution PR’s first commit creation date.|
| `created_at`              | str    | The creation date of the pull request.                                                          |
| `test_patch`              | str    | A test-file patch that was contributed by the solution PR.                                      |
| `problem_statement`       | str    | The issue title and body.                                                                       |
| `version`                 | str    | Installation version to use for running evaluation.                                             |
| `environment_setup_commit`| str    | Commit hash to use for environment setup and installation.                                      |
| `FAIL_TO_PASS`            | str    | A JSON list of strings that represent the set of tests resolved by the PR and tied to the issue resolution. |
| `PASS_TO_PASS`            | str    | A JSON list of strings that represent tests that should pass before and after the PR application.|


In [10]:
rprint(Markdown("---"), "\n\n[bold magenta] PROBLEM STATEMENT: [/bold magenta]\n", Markdown("---"))
rprint(test_df[test_df.repo=="scikit-learn/scikit-learn"].problem_statement.values[0])

<h3 style="font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: #203354; background-color: #ffffff;">5.3 STEP BY STEP SOLVE WITH GEMINI</h3>
<hr>

<br>

**First we make some helper functions to play with Gemini**

Basically, you realize almost immediately that we are building an agent framework. The framework will use the best Open Weights model we can leverage (or a custom trained one) and anything else we need. For now I'm just going to manually explore. 

In [15]:
def format_google_message(
    role: Literal["user", "model"],
    content: str
) -> types.Content:
    """Format a single message for the Gemini API.
    
    Args:
        role (str): 
            The role of the message sender ('user' or 'model')
        content (str): 
            The content of the message
            
    Returns:
        types.Content: Formatted message ready for the Gemini API
        
    Raises:
        ValueError: If role is not 'user' or 'model'
    """
    if role not in ["user", "model"]:
        raise ValueError("Role must be either 'user' or 'model'")
        
    return types.Content(
        role=role,
        parts=[types.Part.from_text(content)]
    )

def process_google_message_history(
    message_history: list[dict[str, str]],
    model_config: dict[str, Any]
) -> list[types.Content]:
    """Process a list of historical messages and extract any system instructions.
    
    Args:
        message_history (list[dict[str, str]]): 
            A list of previous messages, where each message is a dictionary with:
                - 'role' or 'type': 
                    Origin of message ('user', 'model', or 'system')
                - 'parts' or 'content': 
                    The message content
        model_config (dict[str, Any]): 
            Configuration dictionary that may be modified if system instructions are found
            
    Returns:
        list[types.Content]: 
            Processed message history ready for the Gemini API
        
    Example:
        >>> history = [
        ...     {"role": "user", "content": "Hello"},
        ...     {"role": "model", "content": "Hi there!"},
        ...     {"role": "system", "content": "Be concise"}
        ... ]
        >>> config = {}
        >>> processed = process_message_history(history, config)
        >>> print(len(processed))  # 2 (system message removed)
        >>> print(config)          # {'system_instruction': 'Be concise'}
    """
    # Initialize
    formatted_messages = []
    
    # Iterate over and extract role and content with support for fallback key values.
    for msg in message_history:
        role = msg.get("role", msg.get("type", "user"))
        content = msg.get("parts", msg.get("content", ""))
        
        # Handle system instructions
        if "system" in role.lower() and content:
            rprint("[bold red]Detected system instruction message, overriding any provided system instruction[/bold red]")
            model_config["system_instruction"] = content
            continue
            
        # Add regular messages to history
        try:
            formatted_messages.append(format_google_message(role, content))
        except ValueError as e:
            raise ValueError(f"Invalid message in history: {e}")    
    return formatted_messages

def to_gemini(
    message: str,
    message_history: list[dict[str, str]] | None = None,
    model_name: str = "gemini-2.0-flash-exp",
    stream: bool = True,
    system_instruction: str | None = None,
    **model_config: Any
) -> str:
    """Send a message to Google's Gemini model and get the response.

    Args:
        message (str): 
            The message to send to Gemini
        message_history (list[dict[str, str]], optional): 
            Previous messages in the conversation. Each message should be a dictionary with:
                - 'role' or 'type': Origin of message ('user', 'model', or 'system')
                - 'parts' or 'content': The message content
        model_name (str, optional): 
            Name of the Gemini model to use
            Defaults to gemini-2.0-flash-exp (available in the free tier model)
        stream (bool, optional): 
            Whether to stream the response with live formatting
        system_instruction (str, optional):
            Instructions for how the model should behave
            Can also be provided via a system message in message_history
        **model_config (Any, optional): 
            Additional configuration parameters for the model

    Returns:
        str: The model's response text
        
    Example:
        >>> # Simple usage
        >>> response = to_gemini("What's the weather like?")
        
        >>> # With message history and system instruction
        >>> history = [
        ...     {"role": "user", "content": "Hi, I'm Darien, the weather is beautiful here."},
        ...     {"role": "model", "content": "Hello Darien! Thanks for letting me know!"}
        ... ]
        >>> response = to_gemini(
        ...     message="What's something that rhymes with my name?",
        ...     message_history=history,
        ...     system_instruction="Be concise and write all names in bold",
        ...     temperature=0.7
        ... )
        
        >>> # Without streaming, using a different model
        >>> response = to_gemini(
        ...     "Explain quantum computing",
        ...     stream=False,
        ...     model_name="gemini-1.5-pro"
        ... )
    """
    # (0) Get Fresh Client
    client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

    # (1) Initialize config and process system instruction
    if system_instruction:
        model_config["system_instruction"] = system_instruction
        
    # (2) Process message history if provided
    contents = []
    if message_history:
        contents.extend(process_google_message_history(message_history, model_config))
        
    # (3) Add the current message
    contents.append(format_google_message("user", message))

    # (4) Create generation config
    config = types.GenerateContentConfig(**model_config) if model_config else None

    # (5) Generate response
    if stream:
        response = client.models.generate_content_stream(
            model=model_name,
            contents=contents,
            config=config
        )
        return stream_with_styling(response)
    
    # else
    reponse = client.models.generate_content(
        model=model_name,
        contents=contents,
        config=config
    )
    return response.text

# Showcase the tool
response = json.loads(to_gemini(
    message="Write a haiku about what we've discussed so far?",
    message_history=[{"role": "user", "content": "Hi, I'm Darien, the weather is beautiful here."}, {"role": "model", "content": "Hello Darien! Thanks for letting me know!"}],
    system_instruction="Be concise and respond in JSON format",
    temperature=0.7, 
    response_mime_type="application/json",
))
response

Output()


[1m{[0m
    [32m'haiku'[0m: [1m{[0m
        [32m'line1'[0m: [32m'Darien speaks now'[0m,
        [32m'line2'[0m: [32m'Weather is beautiful here'[0m,
        [32m'line3'[0m: [32m'I acknowledge you'[0m
    [1m}[0m
[1m}[0m

**Next we look at an example problem from the kprize data** 

In [43]:
DEMO_IDX = 3
DEMO_ROW = kprize_df.iloc[DEMO_IDX]
DEMO_PROBLEM_STATEMENT = DEMO_ROW["problem_statement"]
DEMO_PASS_TO_PASS_TESTS = DEMO_ROW["PASS_TO_PASS"]
DEMO_FAIL_TO_PASS_TESTS = DEMO_ROW["FAIL_TO_PASS"]
DEMO_REPO_PATH = os.path.join(COMP_REPOS_DIR, f'repo__{DEMO_ROW["instance_id"]}')

# tree = get_directory_tree(DEMO_REPO_PATH)
# rprint(tree)

rprint(DEMO_PROBLEM_STATEMENT)

display(pd.DataFrame(DEMO_ROW).T)

Unnamed: 0,instance_id,repo,problem_statement,patch,test_patch,pull_number,base_commit,PASS_TO_PASS,FAIL_TO_PASS,issue_numbers,problem_statement_length,patch_length,test_patch_length,PASS_TO_PASS_count
3,astropy__astropy-16898,astropy/astropy,BUG: tables do not deal well with zero-sized s...,diff --git a/astropy/io/registry/core.py b/ast...,diff --git a/astropy/io/fits/tests/test_connec...,16898,ee6d087baf301c1d08db92e6e5b6d909d57e6fac,[astropy/io/fits/tests/test_connect.py::TestSi...,[astropy/io/fits/tests/test_connect.py::test_z...,[16897],315,2203,2463,2


In [56]:
def get_lines_from_file(file_path: str, line_range: str | None = None, as_list: bool = False) -> str:
    """Returns the lines of code from a specified file and line range.

    Args:
        file_path (str):
            The path to the file from which to retrieve lines.
        line_range (str, optional):
            A string representing the range of lines in the format "start-end".
            For example, "10-20" means lines 10 through 20, inclusive (1-based indexing).
            When not provided all lines are returned.
        as_list (bool, optional):
            Keep the lines separate.

    Returns:
        str:
            A concatenated string of lines from the specified range. If the file
            is not found or if the line range is invalid, returns an error message string.
    """
    if not os.path.isfile(file_path):
        return f"[Error] File not found: {file_path}"

    with open(file_path, 'r', encoding='utf-8') as f:
        snippets = f.readlines()

    if line_range:
        try:
            if "-" not in line_range:
                line_range=f"{line_range.strip()}-{line_range.strip()}"
            start_line, end_line = [int(x.strip()) for x in line_range.split("-")]
            snippets = snippets[start_line-1 : end_line]
        except ValueError:
            return f"[Error] Invalid line range: {line_range}. Must be either a single integer or two integers delimited by a single dash."
    return ''.join(snippets) if not as_list else snippets
    

def _normalize_imports(lines: list[str]) -> list[str]:
    """Processes a list of lines to collect and normalize all import statements.

    Args:
        lines (list[str]):
            The lines of code from which to extract import statements.

    Returns:
        list[str]:
            A sorted list of unique import statements, each as a single line.
    """
    imports = set()
    current_import = []
    inside_multiline = False

    import_pattern = re.compile(r'^\s*(from\s+\S+\s+import\s+|import\s+)')

    for line in lines:
        stripped = line.strip()

        if inside_multiline:
            # Check if we are inside a multi-line import and process continuation
            if stripped.endswith(')'):
                current_import.append(stripped[:-1])
                imports.add(' '.join(' '.join(current_import).split()))
                current_import = []
                inside_multiline = False
            else:
                current_import.append(stripped)
            continue

        match = import_pattern.match(line)
        if match:
            if stripped.endswith('('):
                # Begin multi-line import
                current_import.append(stripped[:-1])
                inside_multiline = True
            elif '(' in stripped and ')' in stripped:
                # Handle single-line imports with parentheses
                base_import, items = stripped.split('(', 1)
                items = items.rstrip(')').split(',')
                for item in items:
                    imports.add(f"{base_import.strip()} {item.strip()}")
            else:
                # Single-line import
                imports.add(stripped)

    return sorted(imports)


def _collect_imports_from_lines(lines: list[str]) -> list[str]:
    """Collects import statements from a list of lines.

    Args:
        lines (list[str]):
            The lines of code in which to search for import statements.

    Returns:
        list[str]:
            A list of import statements, each stripped of trailing newlines.
    """
    imports = []
    import_pattern = re.compile(r'^\s*(?:import|from)\s+')
    for line in lines:
        if import_pattern.match(line):
            imports.append(line.rstrip('\n'))
    return _normalize_imports(imports)


def search_code(
    root_directory: str,
    search_string: str,
    n_lines_before: int = 0,
    n_lines_after: int = 0,
    return_imports: bool = False
) -> list[dict[str, str | int | list[str]]]:
    """Searches for a given string in all .py files under root_directory.
    
    Optionally returning surrounding lines (context) and import statements from matching files.

    Args:
        root_directory (str):
            The path to the root directory of the codebase to search.
        search_string (str):
            The string to search for in .py files.
        n_lines_before (int, optional):
            Number of lines of context to include before each match.
            Defaults to 0.
        n_lines_after (int, optional):
            Number of lines of context to include after each match.
            Defaults to 0.
        return_imports (bool, optional):
            Whether to collect and return all import statements found in each file
            that contains at least one match. Defaults to False.

    Returns:
        list[dict[str, str | int | list[str]]]:
            A list of dictionaries, each containing:
              - 'file': The path to the file containing the match
              - 'line': The line number of the match in that file (1-based)
              - 'content': The exact line content for that match
              - 'context_before': A list of lines preceding the match
              - 'context_after': A list of lines following the match
              - 'imports': A list of import statements (only if return_imports=True)
    """
    matches: list[dict[str, str | int | list[str]]] = []
    for dirpath, _, filenames in os.walk(root_directory):
        for filename in filenames:
            if filename.endswith('.py'):
                full_path = os.path.join(dirpath, filename)
                with open(full_path, 'r', encoding='utf-8') as f:
                    lines = f.readlines()

                file_imports = _collect_imports_from_lines(lines) if return_imports else []

                for i, line in enumerate(lines, start=1):
                    if search_string in line:
                        start_idx = max(0, i - 1 - n_lines_before)
                        end_idx = min(len(lines), i - 1 + n_lines_after)
                        context_before = [l.rstrip('\n') for l in lines[start_idx:i - 1]]
                        context_after = [l.rstrip('\n') for l in lines[i:end_idx]]

                        match_entry = {
                            'file': full_path,
                            'line': i,
                            'content': line.rstrip('\n'),
                            'context_before': context_before,
                            'context_after': context_after
                        }

                        if return_imports:
                            match_entry['imports'] = file_imports
                        matches.append(match_entry)
    return matches


def _extract_entire_definition(
    lines: list[str],
    start_index: int
) -> list[str]:
    """Extracts the entire definition body (function or class) starting at a given line.

    Args:
        lines (list[str]):
            The full list of lines from the file (unmodified).
        start_index (int):
            The index (0-based) of the line where the definition ('def' or 'class')
            was found.

    Returns:
        list[str]:
            A list of lines comprising the entire definition block.
    """
    definition_lines: list[str] = []
    base_indent = len(lines[start_index]) - len(lines[start_index].lstrip())
    top_def_pattern = re.compile(r'^\s*(def|class)\s+')

    current_index = start_index
    while current_index < len(lines):
        line = lines[current_index]
        current_indent = len(line) - len(line.lstrip())

        if (current_index > start_index and top_def_pattern.match(line) and current_indent <= base_indent):
            break

        definition_lines.append(line)
        current_index += 1

    return definition_lines


def _extract_entire_definition(lines: list[str], start_index: int) -> list[str]:
    """Extracts all lines in the definition block (function or class). 
    
    This is starting at start_index and continuing until we reach a line with 
    less-or-equal indentation that indicates the next top-level definition, or the end of file.

    Args:
        lines (list[str]):
            The lines containing the entirety of the class definition.
        start_index (int):
            Where we will start checking from looking for the relevant information.

    Returns:
        A list of strings representing the lines for a given definition block (function/class/method)
    """
    definition_block = []
    initial_indent = _get_indent_level(lines[start_index])
    definition_block.append(lines[start_index])
    # Gather everything that's part of this definition’s indentation
    for idx in range(start_index + 1, len(lines)):
        line = lines[idx]
        if line.strip() == '':
            # Blank lines inside the definition are included
            definition_block.append(line)
            continue
        if _get_indent_level(line) <= initial_indent and re.match(r'^\s*(def|class)\s+', line):
            # Found a new top-level definition
            break
        definition_block.append(line)
    return definition_block


def _get_indent_level(line: str) -> int:
    """Utility to count the number of leading spaces in a line.

    leading_spaces = (line-length minus (line-length minus non-prefixing-spaces))
    
    Args:
        line (str): The line of code

    Returns:
        The number of leading spaces 
    """
    return len(line) - len(line.lstrip(' '))

    
def _find_method_block_in_lines(
    block_lines: list[str], 
    method_name: str
) -> tuple[int, int] | None:
    """Within a block of lines (e.g. a class block), find the start and end line indices (inclusive).
    
    This is used to be able to allow for effective retrieval of method code from a file.
    For example, the definition for 'def method_name(...)' may exist within a class.

    Args:
        block_lines (list[str]):
            The line by line strings making up the class definition
        method_name (str):
            The name of the method to be extracted.

    Returns:
        The start and end line indices (if found and inclusive) for the method.
    """
    pattern = re.compile(rf'^\s*def\s+{re.escape(method_name)}\s*\(')
    for i, line in enumerate(block_lines):
        if pattern.search(line):
            # Found the start. Now find where it ends by indentation.
            start_idx = i
            init_indent = _get_indent_level(line)
            # Move forward to find where this method ends.
            for j in range(i + 1, len(block_lines)):
                if block_lines[j].strip() == '':
                    continue
                if _get_indent_level(block_lines[j]) <= init_indent and re.match(r'^\s*(def|class)\s+', block_lines[j]):
                    # Reached the next method/class -> end of this method’s block
                    return (start_idx, j - 1)
            return (start_idx, len(block_lines) - 1)  # Goes until end of block
    return None

def _extract_class_up_to_init_or_method(
    lines: list[str],
    class_index: int,
    method_name: str
) -> list[str]:
    """Grab the first part of a class definition up to the point at which initialization has completed.

    (1) Extract the entire class definition at class_index (using _extract_entire_definition).
    (2) Within that class block, find the __init__ block (if any) and the block for method_name (if any).
    (3) Return lines from the start of the class up through the furthest end of either __init__ or the method.

    Args:
        lines (list[str]):
            The lines of code containing the class definition.
        class_index (int):
            The starting point of the class (indexable) for the definition within the lines.
        method_name (str):
            The method we want to retrieve (in addition to the initialization code)
    
    Returns:
        list[str]:
            The relevant lines as a list of strings.
    """
    class_block = _extract_entire_definition(lines, class_index)
    # Look for __init__ and the target method
    init_block_bounds = _find_method_block_in_lines(class_block, '__init__')
    method_block_bounds = _find_method_block_in_lines(class_block, method_name)

    # If neither __init__ nor method is found, we just return the whole class
    if not init_block_bounds and not method_block_bounds:
        return class_block

    furthest_line = 0
    if init_block_bounds:
        furthest_line = max(furthest_line, init_block_bounds[1])
    if method_block_bounds:
        furthest_line = max(furthest_line, method_block_bounds[1])

    # Slice from start of the class block up to furthest_line
    return class_block[:furthest_line + 1]


def _parse_class_and_method(object_name: str) -> tuple[str | None, str]:
    """Get the class and method names separately from an object if applicable.

    For example, for the Cat class with method _meow:
        - If object_name = "Cat._meow", returns ("Cat", "_meow").
        - Otherwise (object_name="Cat"), returns (None, object_name) if there's no dot.
        
    Args:
        object_name (str):
            The string containing the object name, one of:
                - Class Name: 'Cat'
                - Method Name: '_meow'
                - Function Name: make_cat_meow
                - Method With Class Prefix: Cat._meow

    Returns:
        tuple[str | None, str]:
            - The method name (or None if no dot found) 
            - followed by the class name within a tuple
    """
    # You could make this more robust, e.g., handle multiple dots
    # or disallow multiple dots. Adjust as you see fit.
    if '.' in object_name:
        parts = object_name.split('.', 1)  # split on first dot
        if len(parts) == 2:
            return parts[0], parts[1]  # class_name, method_name
    return None, object_name  # No dot -> treat entire string as the object

def get_object_definition(
    root_directory: str,
    object_name: str,
    return_imports: bool = False
) -> dict[str, str | int | list[str]] | None:
    """Searches the codebase for the first definition of a function, class, or method matching object_name.

    If object_name is a method referenced with dot notation (e.g. "Cat._meow"),
    then we find class 'Cat', extract the relevant portion of its definition block,
    and include the method definition plus any __init__.

    Args:
        root_directory (str):
            The path to the root directory of the codebase.
        object_name (str):
            The name of the function or class to find (e.g., "my_function", "MyClass", or "Cat._meow").
        return_imports (bool, optional):
            Whether to collect import statements found in the file.

    Returns:
        dict[str, str | int | list[str]] | None: 
            A dictionary describing the object definition, or None if not found.
                - file (str): Path to the file containing the definition.
                - line (int): The 1-based line number where the definition appears.
                - content (str): The exact line that matched (the def/class line).
                - definition_block (list[str]): The extracted lines of the definition.
                - imports (list[str], optional): The file’s import statements, if return_imports=True.
    """
    class_name, method_name = _parse_class_and_method(object_name)

    # If we have a separate class_name, we'll do a 2-phase search:
    #   - Phase A: find the class definition for class_name
    #   - Phase B: from that block, locate method_name
    if class_name:
        # We only search for 'class class_name'
        class_pattern = re.compile(rf'^\s*class\s+{re.escape(class_name)}\b')

        for dirpath, _, filenames in os.walk(root_directory):
            for filename in filenames:
                if filename.endswith('.py'):
                    full_path = os.path.join(dirpath, filename)
                    with open(full_path, 'r', encoding='utf-8') as f:
                        lines = f.readlines()
                    file_imports = _collect_imports_from_lines(lines) if return_imports else []

                    for i, line in enumerate(lines, start=1):
                        if class_pattern.search(line.strip()):
                            # Found the class
                            class_definition_block = _extract_entire_definition(lines, i - 1)
                            # Now see if we can find the method inside
                            # We need to see if method_name is actually a method
                            # If method_name is '__init__', same logic applies
                            bounds = _find_method_block_in_lines(class_definition_block, method_name)

                            if bounds is None:
                                # Possibly there's no such method, but we did find the class
                                # If you want to return None in this scenario, do so:
                                # return None
                                # Otherwise, maybe you still want to return the entire class?
                                # For now, let's just return None to signal we didn't find a method
                                continue

                            # We do have a method -> let's figure out the extended range.
                            init_bounds = _find_method_block_in_lines(class_definition_block, '__init__')
                            furthest_line = max(bounds[1], init_bounds[1] if init_bounds else 0)
                            final_block = class_definition_block[: furthest_line + 1]

                            # Return the combined class + method snippet
                            result = {
                                'file': full_path,
                                'line': i,
                                'content': line.rstrip('\n'),  # The class definition line
                                'definition_block': [l.rstrip('\n') for l in final_block],
                            }
                            if return_imports:
                                result['imports'] = file_imports
                            return result
        # If we exit all loops without finding anything, return None
        return None

    else:
        # class_name is None -> (handle "def object_name" or "class object_name")
        pattern = re.compile(rf'^\s*(?:def|class)\s+{re.escape(method_name)}\b')

        for dirpath, _, filenames in os.walk(root_directory):
            for filename in filenames:
                if filename.endswith('.py'):
                    full_path = os.path.join(dirpath, filename)

                    with open(full_path, 'r', encoding='utf-8') as f:
                        lines = f.readlines()

                    file_imports = _collect_imports_from_lines(lines) if return_imports else []

                    for i, line in enumerate(lines, start=1):
                        if pattern.search(line.strip()):
                            stripped = line.strip()
                            if stripped.startswith(f'class {method_name}'):
                                # It's a class definition -> old approach
                                definition_block = _extract_entire_definition(lines, i - 1)
                            else:
                                # It's a def -> could be a top-level function or a method
                                def_indent = _get_indent_level(line)
                                class_line_idx = None
                                for rev_idx in range(i - 2, -1, -1):
                                    if lines[rev_idx].lstrip().startswith('class '):
                                        class_indent = _get_indent_level(lines[rev_idx])
                                        if class_indent < def_indent:
                                            class_line_idx = rev_idx
                                            break

                                if class_line_idx is None:
                                    # Top-level function
                                    definition_block = _extract_entire_definition(lines, i - 1)
                                else:
                                    # It's a method. Extract the class portion up to end of __init__ or the method
                                    definition_block = _extract_class_up_to_init_or_method(
                                        lines, class_line_idx, method_name
                                    )

                            # Build the final result
                            result = {
                                'file': full_path,
                                'line': i,
                                'content': line.rstrip('\n'),
                                'definition_block': [l.rstrip('\n') for l in definition_block],
                            }
                            if return_imports:
                                result['imports'] = file_imports

                            return result

        return None


def process_instructions(
    json_instructions: str | dict,
    root_directory: str,
    search_kwargs: Any | None = None,
    lookup_kwargs: Any | None = None,
) -> list[dict[str, Any]]:
    """Parses JSON instructions and processes each step to retrieve code snippets or definitions.

    Args:
        json_instructions (str | dict):
            The instructions, either as a JSON string or a Python dictionary.
        root_directory (str):
            The path to the root directory of the codebase.

    Returns:
        list[dict[str, Any]]: A list of dictionaries containing the results of each step.
    """
    instructions = json_instructions
    if isinstance(instructions, str):
        instructions = json.loads(instructions)
        
    # Initialize
    search_kwargs = search_kwargs if search_kwargs else {}
    lookup_kwargs = lookup_kwargs if lookup_kwargs else {}
    next_steps = instructions.get('clear_next_steps', [])
    results = []

    for step in next_steps:

        # Initialize the step result
        result = {}

        if 'search' in step:
            search_string = step['search']
            search_results = search_code(
                root_directory,
                search_string,
                n_lines_before=search_kwargs.get('n_lines_before', 0),
                n_lines_after=search_kwargs.get('n_lines_after', 0),
                return_imports=search_kwargs.get('return_imports', False)
            )
            result['search'] = search_string
            result['results'] = search_results

        elif 'file' in step and 'lines' in step:
            file_path = step['file'] if os.path.isfile(step['file']) else os.path.join(root_directory, step['file'])
            line_range = step['lines']
            snippet = get_lines_from_file(file_path, line_range)
            result['file'] = file_path
            result['lines'] = line_range
            result['snippet'] = snippet

        elif 'object' in step:
            object_name = step['object']
            definition = get_object_definition(
                root_directory, 
                object_name,
                return_imports=lookup_kwargs.get('return_imports', False)
            )
            result['object'] = object_name
            result['definition'] = definition

        results.append(result)

    return results

# rprint("[bold cyan]Demo Code Search[/bold cyan]")
# search_code_results = search_code(DEMO_REPO_PATH, "yield from nodes[0]._infer(context, **kwargs)", return_imports=True, n_lines_before=10, n_lines_after=10)
# rprint(search_code_results)

# rprint("[bold cyan]Demo Object Search[/bold cyan]")
# object_def_results = get_object_definition(DEMO_REPO_PATH, "Node", return_imports=True)
# rprint(object_def_results)

# rprint("[bold cyan]Get Lines from File[/bold cyan]")
# line_results = get_lines_from_file(os.path.join(DEMO_REPO_PATH, "astroid/nodes/node_classes.py"), "150-175")
# rprint(line_results)

In [59]:
STEP_1_PROMPT = """You are a brilliant software engineer tasked with solving github issues in a reproducible and logical step-by-step way.

Context: 
  - We have a large code repository and a specific GitHub issue (pasted below).
  - The codebase is too big to share in full, so you must work incrementally. 
  - I can provide you with specific code snippets, files, functions, or lines of code on demand if you tell me which files/lines/keywords you want.
  - I cannot provide you with access to the internet or previous Github commits/issues/PRs.
  - I will provide the things you ask for in the section titled Previously Requested Information.

Your Task:
  - Read the issue text (below) carefully.
  - Summarize the problem in your own words making sure to understand how the requested information helps you and reframes the issue.
  - Outline a plan to investigate and solve the issue. This plan does not have to be complete, as at any time we can review and plan anew.
  - Describe the next steps in a consistently formatted way (JSON LIST of ACTIONS) that describes what searches to perform or which files/functions or lines of code you might want to see first.
      - You can only ask for very specific things (for each step you can specify these things in narrowing order --> 'file' --> 'object' --> 'lines'):
          - Specific file(s)
          - Specific object(s) (will be attempted if no file is provided and will return the first found instance of the function/method/object)
          - Specific line(s) of code (requires a specified file)
          - Search for code (will search the codebase for the specified code string and will return the File, Function, and Line Numbers)
      - For example:
          - {{'file': 'util_in_here.py', 'lines': '123-130'}}
              - This would return --> *the 8 lines from the specified file. whatever they are*
          - {{'object': 'UtilConfig'}}
              - We would than take this and do the search and on the next step provide you with {{'file': ..., 'object': ..., 'lines': ...}}

Your Deliverables:
  - issue_restatement: An illuminating and logical restatement of the problem in your own words taking into consideration the previous steps requested information (if provided).
  - methodical_plan: A plan for identifying what code or information to request from me next (or in the first place).
  - clear_next_steps: The structured and properly formatted next steps in order that will allow us to solve the problem together.

Final Deliverable (Output Only When You Have Solved Everything With Absolute Confidence):
  - final_code_diff: This is only to be output when you are confident you have a solution to the problem statement. You should output a code_diff with the appropriate format so that it will be able to be applied as a unix patch.
  
Important Notes:
  - Do not make assumptions about the codebase. 
  - Be thorough, ask for more rather than less. This includes when you ask for specific lines of code, in that case ask for maybe 10 before and 10 after as well (or whatever you think is appropriate).
  - Do not ask me to reproduce the issue. The issue exists as described by the problem statement below.
  - If you don’t know where a relevant piece of code might be, propose a strategy to search for it (e.g., searching by function name, references to certain classes, or by keywords).
  - If the section 'Previously Requested Information' is empty, then this is the first step in the process. 
  - Do not return anything other than the deliverables as a JSON object with the deliverables as keys ('issue_restatement', 'methodical_plan', 'clear_next_steps')
  - If you return a 'file' string in the clear_next_steps, please ensure it only includes the path up to the package name (i.e. 'openai/openai-python/blob/main/src/openai/_client.py')
  - You must output your answer in JSON. The clear_next_steps should be formatted as JSON list of dicts mapping strings to strings.
  - If you think you can solve it, you should.

Relevant GitHub Issue Text (Problem Statement):
```markdown
{problem_statement}
```

Previously Requested Information:
```
{requested_info}
```
"""
CONVERSATION_HISTORY = []


def process_steps(
    problem_statement: str, 
    initial_prompt: str, 
    repo_path: str, 
    accumulate_requested_info: bool = True, 
    temperature: float = 0.1, 
    max_steps: int = 10
) -> dict[str, Any]:
    """A generator function to process steps sequentially.
    
    Args:
        problem_statement (str): The initial problem statement.
        initial_prompt (str): The prompt template for formatting each step's input.
        repo_path (str): The path to the repository for processing instructions.
        accumulate_requested_info (bool, optional): Whether to append previously accumulated request info.
        temperature (float, optional): The temperature to use
    
    Yields:
        dict[str, Any]: All the information for a given step
    """
    requested_info = ''
    for i in range(max_steps):
        # Prepare the input for this step
        step_input = initial_prompt.format(problem_statement=problem_statement, requested_info=requested_info)
        # rprint(f"Step {i + 1} Input: {step_input}")
        
        # Simulate sending input to Gemini
        step_output = json.loads(to_gemini(
            step_input,
            temperature=temperature,
            response_mime_type="application/json",
        ))
        rprint(f"Step {i + 1} Output: {step_output}")
        
        # Process and accumulate the instructions
        processed_instructions = process_instructions(json_instructions=step_output, root_directory=repo_path)
        _requested_info = f"\nSTEP {i+1} REQUESTED INFO\n{processed_instructions}"
        if accumulate_requested_info:
            requested_info += _requested_info
        else:
            requested_info = _requested_info
        
        # Yield the requested info for this step
        yield {
            "step_output": step_output,
            "requested_info": processed_instructions,
        }

steps = []
step_generator = process_steps(DEMO_PROBLEM_STATEMENT, STEP_1_PROMPT, DEMO_REPO_PATH)
step_1_output = next(step_generator)

Output()


[1m{[0m
    [32m'step_output'[0m: [1m{[0m
        [32m'issue_restatement'[0m: [32m'The issue arises when reading a FITS table containing a zero-sized string column using `astropy.table.Table`. While the table can be read, attempting to display it triggers a `ValueError` due to an issue in how `numpy` handles zero-sized strings. Specifically, the error occurs during the formatting of the table for display, within the `TableFormatter` class. Additionally, `astropy.table.QTable` reads the table but changes the size of the string column to 1, which is not ideal. The root cause seems to be a combination of a numpy bug and how astropy handles zero-sized string columns during table display and subclassing.'[0m,
        [32m'methodical_plan'[0m: [32m'The plan is to first examine the `astropy.table.pprint.TableFormatter` class and the `_pformat_col` and `_pformat_col_iter` methods where the error occurs. Then, I will investigate the `astropy.utils.data_info.dtype_info_name` functi

In [102]:
DEMO_PROBLEM_STATEMENT

[32m'BUG: tables do not deal well with zero-sized string columns\n### Description\r\n\r\n@saimn [0m[32m[[0m[32mnoted[0m[32m][0m[32m([0m[32mhttps://github.com/astropy/astropy/pull/16894#issuecomment-2314640002[0m[32m)[0m[32m in #16894 that zero sized data are a problem:\r\n```\r\nimport numpy as np\r\nfrom astropy.io import fits\r\nfrom astropy.table import QTable, Table\r\ndata = np.array[0m[32m([0m[32m[[0m[32m([0m[32m"", 12[0m[32m)[0m[32m][0m[32m, [0m[32mdtype[0m[32m=[0m[32m[[0m[32m([0m[32m"a", "S"[0m[32m)[0m[32m, [0m[32m([0m[32m"b", "i4"[0m[32m)[0m[32m][0m[32m)[0m[32m\r\nfits.BinTableHDU[0m[32m([0m[32mdata[0m[32m)[0m[32m.writeto[0m[32m([0m[32m"zerodtable.fits", [0m[32moverwrite[0m[32m=[0m[32mTrue[0m[32m)[0m[32m\r\nt = Table.read[0m[32m([0m[32m"zerodtable.fits"[0m[32m)[0m[32m\r\nt\r\n\r\nFile ~/dev/astropy/astropy/astropy/table/pprint.py:295, in TableFormatter._pformat_col[0m[32m([0m[32mself, col,

In [98]:
STEP_2_PROMPT = """You are a meticulous software engineer tasked with generating a precise diff to fix a GitHub issue. You have already investigated the issue and have all the necessary context to create a solution.

Context:
  - You have been provided with the relevant code snippets and context from the investigation phase
  - You must generate a diff that can be applied using the Unix patch command
  - The diff must follow strict formatting requirements to be valid
  - You have access to the original problem statement and all previously requested information

Your Task:
  - Review all the information gathered during the investigation phase
  - Generate a precise diff that solves the issue
  - Ensure the diff follows proper Unix patch format
  - Include only the minimum necessary changes to fix the issue
  - Validate that the diff format is correct before submitting

Required Diff Format:
  - The diff must start with the file path relative to the repository root
  - Use unified diff format (indicated by '---' and '+++' lines)
  - Include the @@ notation to indicate line numbers
  - Use - for removed lines and + for added lines
  - Example format:
    ```diff
    --- a/path/to/file.py
    +++ b/path/to/file.py
    @@ -1,3 +1,3 @@
     unchanged line
    -removed line
    +added line
     unchanged line
    ```

Structure Of Output:
  reasoning:
  ...
  
  validation:
  ...
  
  final_diff:
  ```diff
  ...
  ```

Your Deliverables:
  - reasoning: A clear explanation of how your changes fix the issue
  - validation: A step-by-step verification that your diff is correctly formatted
  - final_diff: The complete diff in proper Unix patch format

Validation Checklist:
  1. File paths are correct and relative to repository root
  2. Unified diff format is used (---, +++, @@)
  3. Line numbers in @@ notation are accurate
  4. Only necessary changes are included
  5. No trailing whitespace in changed lines
  6. Proper indentation maintained
  7. Diff can be applied with Unix patch command

Important Notes:
  - Do not make assumptions about code you haven't seen
  - Include only changes you are confident about based on the investigation
  - The diff must be applicable using the standard Unix patch command
  - Verify all file paths match the repository structure
  - The final_diff must be a complete, properly formatted patch
  - Do not include any explanatory text within the diff itself

Original Problem Statement:
```markdown
{problem_statement}
```

Investigation Results:
```
{investigation_results}
```


"""

other_relevant_info = get_object_definition(DEMO_REPO_PATH, "QTable")

In [101]:
STEP_2_PROMPT.format(
        problem_statement=DEMO_PROBLEM_STATEMENT, 
        investigation_results=step_1_output["requested_info"]+[other_relevant_info]
    )



In [99]:
raw_output = to_gemini(
    message=STEP_2_PROMPT.format(
        problem_statement=DEMO_PROBLEM_STATEMENT, 
        investigation_results=step_1_output["requested_info"]+[other_relevant_info]
    ),
    temperature=0.1,
    # response_mime_type="application/json",
)
# output = json.loads(raw_output)
# rprint(output["final_diff"])

Output()

**I TESTED THIS IN MY LOCAL AND IT ACTUALLY DOES FIX THE ISSUE**

I haven't run the tests yet... but I will