In [2]:
from datasets import load_dataset

ds = load_dataset("hackercupai/hackercup")
ds

  from .autonotebook import tqdm as notebook_tqdm


DatasetDict({
    sample: Dataset({
        features: ['name', 'year', 'round', 'statement', 'input', 'solution', 'code', 'output', 'sample_input', 'sample_output', 'images'],
        num_rows: 10
    })
    full: Dataset({
        features: ['name', 'year', 'round', 'statement', 'input', 'solution', 'code', 'output', 'sample_input', 'sample_output', 'images'],
        num_rows: 284
    })
})

In [6]:
ds['sample']['name']

['meta_game',
 'tower_rush',
 'ready_go_part_2',
 'wiki_race',
 'ready_go_part_1',
 'dim_sum_delivery',
 'cheeseburger_corollary_ch1',
 'two_apples_a_day',
 'cheeseburger_corollary_ch2',
 'road_to_nutella']

In [24]:
import os
from unittest.mock import patch
import time
import re
import traceback
import sys


def check_code_structure(extracted_code):
    # Check if the phrase "__name__ == '__main__'" is present in the code
    if "__name__ == '__main__'" not in extracted_code:
        return False, "Missing `if __name__ == '__main__':` block."
    return True, None

def run_extracted_code(extracted_code, test_input):
    # Check if the structure of the code is valid
    is_valid, error_message = check_code_structure(extracted_code)
    
    # Output and Error containers
    output = io.StringIO()
    print(f"Output: {output.getvalue()}")
    error = None
    test_input_lines = [line.strip() for line in test_input.strip().split('\n') if line.strip()]
    
    def mock_input():
        if not test_input_lines:
            raise ValueError("Not enough input data provided")
        return test_input_lines.pop(0)

    with patch('builtins.input', mock_input), contextlib.redirect_stdout(output):
        try:
            # Compile the code object
            code_obj = compile(extracted_code, '<string>', 'exec')
            
            if is_valid:
                # If `if __name__ == '__main__'` exists, use local scope simulating standalone script behavior
                local_scope = {'__name__': '__main__'}
                exec(code_obj, local_scope)
            else:
                # Execute in the global scope if `if __name__ == '__main__'` is not present
                exec(extracted_code)

        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            
            # Get the line number where the error occurred
            tb = traceback.extract_tb(exc_traceback)
            line_no = tb[-1].lineno
            
            # Get the line of code that caused the error
            code_lines = extracted_code.split('\n')
            error_line = code_lines[line_no - 1] if line_no <= len(code_lines) else "Unknown"
            
            error = f"Error occurred on line {line_no}: "
            error += f"{error_line.strip()}\n"
            error += f"Exception: {exc_type.__name__}: {str(exc_value)}\n"
            return None, error

    result = output.getvalue()
    
    # Check if the result is empty and return an appropriate error
    if not result.strip():
        return None, "Error: Code did not produce any output."

    return result, error

# Compare the output generated by the code with the expected output
def compare_with_expected_output(generated_output, expected_output):
    # Check if either output is None
    if generated_output is None:
        return 0, ["Generated output is None"]

    generated_output_lines = generated_output.strip().splitlines()
    expected_output_lines = expected_output.strip().splitlines()

    total_cases = len(expected_output_lines)
    matching_cases = 0
    failed_cases = []

    for i, (generated_line, expected_line) in enumerate(zip(generated_output_lines, expected_output_lines), start=1):
        if generated_line.strip() == expected_line.strip():
            matching_cases += 1
        else:
            failed_cases.append(f"Test Case #{i}: Expected '{expected_line}' but got '{generated_line}'")

    score = (matching_cases / total_cases) * 100 if total_cases > 0 else 0
    return score, failed_cases

# Evaluate generated code on test cases
def evaluate_generated_code_on_test_cases(extracted_code, test_input, test_output):
    # Run the code and get the output
    generated_output, error = run_extracted_code(extracted_code, test_input)
    
    if generated_output is None or generated_output.strip() == "":
        return 0, error or "Error: The code ran without any problem. There's no error in parsing the input. But it produces no output. Please check the entire code again.", generated_output, []
    
    # If there's an error, return it
    if error:
        return 0, error, generated_output, []

    # Compare the generated output with expected output
    score, failed_cases = compare_with_expected_output(generated_output, test_output)
    
    if failed_cases:
        error_msg = f"Test cases failed: {failed_cases}"
        return score, error_msg, generated_output, failed_cases

    return score, error, generated_output, failed_cases

In [25]:
import os 

def extract_problem_cases_from_folder(dataset_path):
    problem_cases = []
    
    # Traverse the directory structure
    for root, dirs, files in os.walk(dataset_path):
        # Get the directory name (problem name)
        problem_name = os.path.basename(root)
        
        # Check if the required files are in the current directory
        if 'statement.txt' in files and 'sample_in.txt' in files and 'sample_out.txt' in files:
            # Read content from the necessary files
            with open(os.path.join(root, 'statement.txt'), 'r') as statement_file:
                statement = statement_file.read().strip()
            
            with open(os.path.join(root, 'sample_in.txt'), 'r') as sample_in_file:
                sample_input = sample_in_file.read().strip()
                
            with open(os.path.join(root, 'sample_out.txt'), 'r') as sample_out_file:
                sample_output = sample_out_file.read().strip()
                
            # Concatenate the information into a problem description
            problem_description = f"""
{statement}
            
### Sample Input
{sample_input}

### Sample Output
{sample_output}
"""
            
            # Add the problem description to the list
            problem_cases.append({
                "name": problem_name,  # The folder name is used as the problem name
                "problem_description": problem_description,
                "sample_input": sample_input,
                "sample_output": sample_output
            })
    
    return problem_cases

In [32]:
extracted_code = "def parse_input(input_str):\n    parts = input_str.strip().split(', ')\n    if len(parts) != 2:\n        return False, 'Input string does not contain exactly two elements'\n    try:\n        N = int(parts[0])\n        K = int(parts[1])\n    except ValueError:\n        return False, 'Invalid integer value in the input string'\n    return True, (N, K)\n\ndef determine_result(N, K):\n    return 'YES' if K <= N // 2 else 'NO'\n\nif __name__ == '__main__':\n    sample_input = input()\n    success, result = parse_input(sample_input)\n    if success:\n        print(f'Case #1: {determine_result(*result)}')\n    else:\n        print(result)"
sample_input = extract_problem_cases_from_folder("./contest_data")[4]['sample_input']
sample_input

'6\n4 17\n1\n2\n5\n10\n1 100\n12\n3 1000000000\n1000000000\n1000000000\n1000000000\n2 22\n22\n22\n1 10\n12\n4 4\n1\n2\n5\n10'

In [33]:
run_extracted_code(extracted_code, sample_input)

Output: 


('Input string does not contain exactly two elements\n', None)

In [34]:
understanding = """
{
  "understanding": {
    "goal": "Determine whether a group of travelers can cross a narrow bridge within a given time limit \(K\) using a flashlight and a wheelbarrow, where each traveler has a specific time to cross alone or with another traveler.",
    "constraints": [
      "The bridge can only support the weight of at most 2 people.",
      "Travelers must stay together and use the flashlight while on the bridge.",
      "A traveler can cross alone in \(S_i\) seconds or with another traveler in \(S_i\) seconds using the wheelbarrow.",
      "The flashlight must always be brought back to the initial side after each trip.",
      "The total time taken for all travelers to cross must not exceed \(K\) seconds."
    ],
    "test_cases": {
      "input_format": "The input starts with an integer \(T\), the number of test cases. Each test case consists of two lines: the first line contains \(N\) and \(K\), and the next \(N\) lines contain the individual times \(S_i\).",
      "output_format": "For each test case, output 'Case #i: YES' if all travelers can cross within \(K\) seconds, otherwise output 'Case #i: NO'."
    },
    "important_ideas": [
      "The optimal strategy involves pairing travelers optimally to minimize the total crossing time.",
      "Using the wheelbarrow effectively can reduce the overall time by allowing faster travelers to help slower ones.",
      "The problem requires careful consideration of the order in which travelers cross and return with the flashlight."
    ],
    "difficulty_assessment": {
      "estimated_difficulty": "Medium",
      "justification": "The problem involves combinatorial optimization and requires strategic thinking about when to pair travelers and when to use the wheelbarrow. While not extremely complex, it demands careful planning and analysis to ensure all travelers can cross within the given time limit."
    }
  }
}
"""

  understanding = """
