## Load Model

In [14]:
# Use a pipeline as a high-level helper
import torch
from transformers import pipeline

model_directory = "/datasets/ai/qwen/hub/models--Qwen--Qwen2.5-Coder-7B-Instruct/snapshots/c03e6d358207e414f1eca0bb1891e29f1db0e242"

pipe = pipeline("text-generation", 
                model=model_directory,
                max_length=8024,
                device=0)



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


## Code Generation Lite

In [1]:
import pandas as pd

df = pd.read_pickle('data/code_generation_lite.pkl')
df = df[df['platform'] == 'leetcode'].reset_index()
df

Unnamed: 0,index,question_title,question_content,platform,question_id,contest_id,contest_date,starter_code,difficulty,public_test_cases,private_test_cases,metadata
0,9,number-of-senior-citizens,You are given a 0-indexed array of strings det...,leetcode,2727,biweekly-contest-104,2023-05-13T00:00:00,"class Solution:\n def countSeniors(self, de...",easy,"[{""input"": ""[\""7868190130M7522\"", \""5303914400...",eJylmcuqJMcRhr3wcwhxwDthMiIvkekHqF1RW4NGC2NkEI...,"{""func_name"": ""countSeniors""}"
1,10,sum-in-a-matrix,You are given a 0-indexed 2D integer array num...,leetcode,2728,biweekly-contest-104,2023-05-13T00:00:00,"class Solution:\n def matrixSum(self, nums:...",medium,"[{""input"": ""[[7, 2, 1], [6, 4, 2], [6, 5, 3], ...",eJzsvTuuNUuSpUeBA7lIqiGEh4fHo0dCIFFCo1ENFNDIKo...,"{""func_name"": ""matrixSum""}"
2,11,maximum-or,You are given a 0-indexed integer array nums o...,leetcode,2730,biweekly-contest-104,2023-05-13T00:00:00,"class Solution:\n def maximumOr(self, nums:...",medium,"[{""input"": ""[12, 9]\n1"", ""output"": ""30"", ""test...",eJyUvUuqbduSZaeECvLw9E3M8R9DJRGElBAiBAHCI0DuiU...,"{""func_name"": ""maximumOr""}"
3,12,maximum-strength-of-a-group,You are given a 0-indexed integer array nums r...,leetcode,2754,biweekly-contest-105,2023-05-27T00:00:00,"class Solution:\n def maxStrength(self, num...",medium,"[{""input"": ""[3, -1, -5, 2, 5, -9]"", ""output"": ...",eJylks0KwjAMxwV9kNBzK+u2fkxfRBg7iEwYyDawPYgIPo...,"{""func_name"": ""maxStrength""}"
4,13,extra-characters-in-a-string,You are given a 0-indexed string s and a dicti...,leetcode,2755,biweekly-contest-105,2023-05-27T00:00:00,"class Solution:\n def minExtraChar(self, s:...",medium,"[{""input"": ""\""leetscode\""\n[\""leet\"", \""code\""...",eJylWEmupMtVZsAiGFo1thCIRoKVILk8iL7v+0BILAIWxY...,"{""func_name"": ""minExtraChar""}"
...,...,...,...,...,...,...,...,...,...,...,...,...
230,506,maximum-sum-of-subsequence-with-non-adjacent-e...,You are given an array nums consisting of inte...,leetcode,3423,weekly-contest-399,2024-05-25T00:00:00,class Solution:\n def maximumSumSubsequence...,hard,"[{""input"": ""[3, 5, 9]\n[[1, -2], [0, -3]]"", ""o...",eJxMvUuqbduSZaeECvLw9D0wx38MFUJpgSsSQoQgQHgEhE...,"{""func_name"": ""maximumSumSubsequence""}"
231,507,minimum-substring-partition-of-equal-character...,"Given a string s, you need to partition it int...",leetcode,3403,biweekly-contest-130,2024-05-11T00:00:00,class Solution:\n def minimumSubstringsInPa...,medium,"[{""input"": ""\""fabccddg\"""", ""output"": ""3"", ""tes...",eJztW8muJLtx9cIfIvRaMGRNlu2f8M4GLC84z/NMw4A/wv...,"{""func_name"": ""minimumSubstringsInPartition""}"
232,508,find-products-of-elements-of-big-array,A powerful array for an integer x is the short...,leetcode,3411,biweekly-contest-130,2024-05-11T00:00:00,class Solution:\n def findProductsOfElement...,hard,"[{""input"": ""[[1, 3, 7]]"", ""output"": ""[4]"", ""te...",eJyVWM2qrMcNzMLPkPVw1k3oH7Wk9pMEhrMIwQFDuDbk3I...,"{""func_name"": ""findProductsOfElements""}"
233,509,find-the-xor-of-numbers-which-appear-twice,"You are given an array nums, where each number...",leetcode,3428,biweekly-contest-131,2024-05-25T00:00:00,class Solution:\n def duplicateNumbersXOR(s...,easy,"[{""input"": ""[1, 2, 1, 3]"", ""output"": ""1"", ""tes...",eJylV1uKHEcQ9IcO4CMM812Yej90EoPQhxGyEZiVQLMfxg...,"{""func_name"": ""duplicateNumbersXOR""}"


In [2]:
class CodeGenerationProblem:
    def __init__(self, question_content, starter_code=None, difficulty=None, public_test_cases=None, private_test_cases=None):
        self.question_content = question_content
        self.starter_code = starter_code
        self.difficulty = difficulty
        self.public_test_cases = public_test_cases
        self.private_test_cases = private_test_cases

In [3]:
# Formatting Starter Code
FORMATTING_MESSAGE_WITH_STARTER_CODE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters."
FORMATTING_WITHOUT_STARTER_CODE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows. Ensure that when the python program runs, it reads the inputs, runs the algorithm and writes output to STDOUT."

In [4]:
def get_generic_question_template_answer(question: CodeGenerationProblem):
    prompt = f"### Question:\n{question.question_content}\n\n"
    if question.starter_code:
        prompt += (
            f"### Format: {FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
        )
        prompt += f"```python\n{question.starter_code}\n```\n\n"
    else:
        prompt += f"### Format: {FORMATTING_WITHOUT_STARTER_CODE}\n"
        prompt += "```python\n# YOUR CODE HERE\n```\n\n"
    prompt += f"### Answer: (use the provided format with backticks)\n\n"
    return prompt

In [29]:
idx = 80

row = df.iloc[idx]
problem = CodeGenerationProblem(
    question_content=row["question_content"],
    starter_code=row["starter_code"] if row["starter_code"] else None,
    difficulty=row['difficulty'],
    public_test_cases=row['public_test_cases'],
    private_test_cases=row['private_test_cases'],
)

In [30]:
print(problem.difficulty)
print(problem.public_test_cases)

hard
[{"input": "10\n20\n3", "output": "2", "testtype": "functional"}, {"input": "1\n10\n1", "output": "1", "testtype": "functional"}, {"input": "5\n5\n2", "output": "0", "testtype": "functional"}]


In [31]:
def get_generic_question_template_answer(question: CodeGenerationProblem):
    prompt = f"### Question:\n{question.question_content}\n\n"
    if question.starter_code:
        prompt += (
            f"### Format: {FORMATTING_MESSAGE_WITH_STARTER_CODE}\n"
        )
        prompt += f"```python\n{question.starter_code}\n```\n\n"
    else:
        prompt += f"### Format: {FORMATTING_WITHOUT_STARTER_CODE}\n"
        prompt += "```python\n# YOUR CODE HERE\n```\n\n"
    prompt += f"### Answer: (use the provided format with backticks)\n\n"
    return prompt

In [32]:
prompt = get_generic_question_template_answer(problem)
print(prompt)

### Question:
You are given positive integers low, high, and k.
A number is beautiful if it meets both of the following conditions:

The count of even digits in the number is equal to the count of odd digits.
The number is divisible by k.

Return the number of beautiful integers in the range [low, high].
 
Example 1:

Input: low = 10, high = 20, k = 3
Output: 2
Explanation: There are 2 beautiful integers in the given range: [12,18]. 
- 12 is beautiful because it contains 1 odd digit and 1 even digit, and is divisible by k = 3.
- 18 is beautiful because it contains 1 odd digit and 1 even digit, and is divisible by k = 3.
Additionally we can see that:
- 16 is not beautiful because it is not divisible by k = 3.
- 15 is not beautiful because it does not contain equal counts even and odd digits.
It can be shown that there are only 2 beautiful integers in the given range.

Example 2:

Input: low = 1, high = 10, k = 1
Output: 1
Explanation: There is 1 beautiful integer in the given range: [10

## Generate Responseimport torch

In [33]:
import torch
torch.cuda.empty_cache()
torch.cuda.ipc_collect()

In [34]:
# Generic System Prompt
SYSTEM_MESSAGE_GENERIC = f"You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests."

messages = [
    {"role": "system", "content": SYSTEM_MESSAGE_GENERIC},
    {"role": "user", "content": prompt},
]

response = pipe(messages)
output = response[0]['generated_text'][2]['content']
output

'```python\nclass Solution:\n    def numberOfBeautifulIntegers(self, low: int, high: int, k: int) -> int:\n        def count_beautiful_numbers(n):\n            if n < 10:\n                return 0\n            digits = []\n            while n > 0:\n                digits.append(n % 10)\n                n //= 10\n            digits.reverse()\n            n = len(digits)\n            dp = [[[[[0] * 21 for _ in range(2)] for _ in range(n + 1)] for _ in range(n + 1)] for _ in range(2)]\n            dp[0][0][0][0][0] = 1\n            \n            for i in range(n):\n                for j in range(i + 1):\n                    for l in range(i + 1):\n                        for m in range(21):\n                            for s in range(2):\n                                if s == 0:\n                                    for d in range(10):\n                                        if d % 2 == 0:\n                                            dp[1][j + 1][l][m][s] += dp[0][j][l][m][s]\n         

In [36]:
print(output)

```python
class Solution:
    def numberOfBeautifulIntegers(self, low: int, high: int, k: int) -> int:
        def count_beautiful_numbers(n):
            if n < 10:
                return 0
            digits = []
            while n > 0:
                digits.append(n % 10)
                n //= 10
            digits.reverse()
            n = len(digits)
            dp = [[[[[0] * 21 for _ in range(2)] for _ in range(n + 1)] for _ in range(n + 1)] for _ in range(2)]
            dp[0][0][0][0][0] = 1
            
            for i in range(n):
                for j in range(i + 1):
                    for l in range(i + 1):
                        for m in range(21):
                            for s in range(2):
                                if s == 0:
                                    for d in range(10):
                                        if d % 2 == 0:
                                            dp[1][j + 1][l][m][s] += dp[0][j][l][m][s]
                                  

In [35]:
import re

output = response[0]['generated_text'][2]['content']

# Try extracting from ```python ... ``` first
match = re.search(r"```python\s*(.*?)```", output, re.DOTALL)

# Fallback to plain ``` ... ``` if no language specified
if not match:
    match = re.search(r"```\s*(.*?)```", output, re.DOTALL)

# Extract and clean
solution_code = match.group(1).strip() if match else None

print("✅ Extracted code:\n", solution_code)

✅ Extracted code:
 None


In [37]:
if solution_code == None:
    print("Hi")

Hi


### Eval

In [15]:
import json
from lcb_runner.evaluation.testing_util import run_test

def evaluate_code_with_row(row, generated_code: str, timeout: int = 30, debug: bool = False):
    """
    Evaluate a solution against public test cases from a LiveCodeBench row.
    Automatically uses call-based or standard_input mode depending on metadata.

    Args:
        row: A row from your benchmark DataFrame.
        generated_code: Python solution (either class-based or standard I/O).
        timeout: Max seconds allowed for execution.
        debug: Print debug info.

    Returns:
        result: Evaluation scores per test.
        metadata: Execution trace including inputs, outputs, errors, etc.
    """
    if not row.get("public_test_cases"):
        print("⚠️ No public test cases found in this row.")
        return None, None

    # Load test cases
    public_tests = json.loads(row["public_test_cases"])
    input_lines = [tc["input"].strip() for tc in public_tests]
    output_lines = [tc["output"].strip() for tc in public_tests]

    fn_name = None
    try:
        metadata = json.loads(row.get("metadata", "{}"))
        fn_name = metadata.get("func_name")
    except Exception as e:
        print(f"⚠️ Failed to parse metadata: {e}")

    sample = {
        "input_output": json.dumps({
            "inputs": input_lines,
            "outputs": output_lines,
            **({"fn_name": fn_name} if fn_name else {})
        }),
        "code": ""
    }

    return run_test(sample, test=generated_code, timeout=timeout, debug=debug)

In [16]:
row

index                                                               219
question_title                       find-the-distinct-difference-array
question_content      You are given a 0-indexed array nums of length...
platform                                                       leetcode
question_id                                                        2777
contest_id                                           weekly-contest-344
contest_date                                        2023-05-07 00:00:00
starter_code          class Solution:\n    def distinctDifferenceArr...
difficulty                                                         easy
public_test_cases     [{"input": "[1, 2, 3, 4, 5]", "output": "[-3, ...
private_test_cases    [{"input": "[3]", "output": "[1]", "testtype":...
metadata                       {"func_name": "distinctDifferenceArray"}
Name: 0, dtype: object

In [17]:
### Patch to bypass reliability_guard restrictions in Jupyter
from lcb_runner import evaluation

def dummy_guard(*args, **kwargs):
    # print("⚠️ reliability_guard skipped for notebook safety.")
    pass

evaluation.testing_util.reliability_guard = dummy_guard

In [18]:
result, metadata_output = evaluate_code_with_row(row, solution_code, timeout=30, debug=True)

print("✅ Result:", result)
print("📝 Metadata:", metadata_output)

start = 19:50:42.620002
loaded input_output = 19:50:42.620043
loading test code = 19:50:42.620052
✅ Result: [True, True]
📝 Metadata: {'execution time': 1.2159347534179688e-05}


In [22]:
public_tests = json.loads(row["public_test_cases"])
    
# Concatenate inputs and outputs into one big stdin/out test case
combined_input = "".join(tc["input"] for tc in public_tests)
combined_output = "".join(tc["output"] for tc in public_tests)

metadata = json.loads(row.get("metadata", "{}"))
fn_name = metadata.get("func_name")
print(f'combined_input: {combined_input}')
print(f'fn_name: {fn_name}')
print("*"*10)
print(f'expected output: {combined_output}')

combined_input: [1, 2, 3, 4, 5][3, 2, 3, 4, 2]
fn_name: distinctDifferenceArray
**********
expected output: [-3, -1, 1, 3, 5][-2, -1, 0, 2, 3]


In [20]:
print("📝 Output from model:", metadata_output.get("output"))
print("✅ Expected output:", metadata_output.get("expected"))

📝 Output from model: None
✅ Expected output: None
