In [7]:
!pip install accelerate

Defaulting to user installation because normal site-packages is not writeable
Collecting accelerate
  Downloading accelerate-1.6.0-py3-none-any.whl.metadata (19 kB)
Downloading accelerate-1.6.0-py3-none-any.whl (354 kB)
Installing collected packages: accelerate
Successfully installed accelerate-1.6.0


In [54]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset, concatenate_datasets
import torch
import evaluate
import re
import traceback
import json

In [2]:
# Load the MBPP dataset from Hugging Face
mbpp = load_dataset("mbpp")
train = mbpp["train"]
test = mbpp["test"]
validation = mbpp["validation"]
prompt = mbpp["prompt"]

# Combine into one dataset
full_mbpp = concatenate_datasets([train, test, validation, prompt])

In [3]:
full_mbpp

Dataset({
    features: ['task_id', 'text', 'code', 'test_list', 'test_setup_code', 'challenge_test_list'],
    num_rows: 974
})

In [48]:
# Load model and tokenizer
model_name = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [56]:
def generate_code(prompt):

    system_prompt = "You are a coding assistant. When given a prompt that includes a function signature and docstring, generate only the function body (the indented code that implements the function). Do not include the function definition (i.e. do not output the 'def' line), imports, or docstring. Do not include any extra text, comments, or explanations. Your output should consist solely of the indented code that can directly follow a function signature."

    prompt = system_prompt + prompt
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(model.device)
    attention_mask = inputs["attention_mask"]
    inputs = inputs.to(device)
    outputs = model.generate(
        inputs.input_ids,
        max_new_tokens=512,
        pad_token_id=tokenizer.eos_token_id,
        attention_mask = attention_mask
    )
    
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    assistant_response = full_response.split("assistant\n")[-1].strip()
    
    return full_response[len(system_prompt):]

In [39]:
def extract_code_block(text: str) -> str:
    """
    Extract the top code block from a model's response.
    Starts from first 'import', 'def', or comment line, and stops before '# Example usage'.
    """
    # Split into lines
    lines = text.splitlines()
    
    code_lines = []
    capturing = False

    for line in lines:
        # Start capturing if we see something that looks like code
        if not capturing and (line.strip().startswith("def") or 
                              line.strip().startswith("import") or 
                              line.strip().startswith("from") or
                              line.strip().startswith("#")):
            capturing = True

        # Stop capturing if we hit example usage
        if capturing:
            if re.match(r"#\s*Example usage", line, re.IGNORECASE):
                break
            code_lines.append(line)

    return "\n".join(code_lines).strip() if code_lines else None

In [8]:
def evaluate_code_with_tests(code: str, test_cases: list[str]) -> float:
    """
    Executes a given code string and runs test cases (as strings).
    Returns the fraction of tests that pass.
    """
    passed = 0
    total = len(test_cases)

    # Run the code in an isolated environment
    local_env = {}
    try:
        exec(code, local_env)
    except Exception as e:
        print("Code failed to compile:", e)
        return 0.0  # Bail out if the function is invalid

    for test in test_cases:
        try:
            exec(test, local_env)
            passed += 1
        except Exception:
            print(f"Test failed: {test}")
            traceback.print_exc()
            continue

    return round(passed / total, 2) if total > 0 else 0.0

In [52]:
def save_data(data, file_path):
    with open(file_path, "a") as f:
        for item in data:
            json.dump(item, f)
            f.write("\n")

In [10]:
full_mbpp

Dataset({
    features: ['task_id', 'text', 'code', 'test_list', 'test_setup_code', 'challenge_test_list'],
    num_rows: 974
})

In [11]:
full_mbpp[0]

{'task_id': 601,
 'text': 'Write a function to find the longest chain which can be formed from the given set of pairs.',
 'code': 'class Pair(object): \r\n\tdef __init__(self, a, b): \r\n\t\tself.a = a \r\n\t\tself.b = b \r\ndef max_chain_length(arr, n): \r\n\tmax = 0\r\n\tmcl = [1 for i in range(n)] \r\n\tfor i in range(1, n): \r\n\t\tfor j in range(0, i): \r\n\t\t\tif (arr[i].a > arr[j].b and\r\n\t\t\t\tmcl[i] < mcl[j] + 1): \r\n\t\t\t\tmcl[i] = mcl[j] + 1\r\n\tfor i in range(n): \r\n\t\tif (max < mcl[i]): \r\n\t\t\tmax = mcl[i] \r\n\treturn max',
 'test_list': ['assert max_chain_length([Pair(5, 24), Pair(15, 25),Pair(27, 40), Pair(50, 60)], 4) == 3',
  'assert max_chain_length([Pair(1, 2), Pair(3, 4),Pair(5, 6), Pair(7, 8)], 4) == 4',
  'assert max_chain_length([Pair(19, 10), Pair(11, 12),Pair(13, 14), Pair(15, 16), Pair(31, 54)], 5) == 5'],
 'test_setup_code': '',
 'challenge_test_list': []}

In [None]:
prompt = '''#Write a python function to remove first and last occurrence of a given character from the string.
def remove_Occ(s,ch):'''
response = generate_code(prompt)

In [13]:
len(full_mbpp)

974

In [42]:
full_mbpp[1]

{'task_id': 602,
 'text': 'Write a python function to find the first repeated character in a given string.',
 'code': 'def first_repeated_char(str1):\r\n  for index,c in enumerate(str1):\r\n    if str1[:index+1].count(c) > 1:\r\n      return c \r\n  return "None"',
 'test_list': ['assert first_repeated_char("abcabc") == "a"',
  'assert first_repeated_char("abc") == "None"',
  'assert first_repeated_char("123123") == "1"'],
 'test_setup_code': '',
 'challenge_test_list': []}

In [55]:
data = []
for i in range(10):
    d1 = {}
    d2 = {}
    
    d2['prompt'] = full_mbpp[i]['text']
    d2['code'] = full_mbpp[i]['code']
    d2['score'] = 1
    
    index = full_mbpp[i]['code'].index(':')
    prompt = '#' + full_mbpp[i]['text'] + '\n' + full_mbpp[i]['code'][:index+1]
    response = generate_code(prompt)
    code = extract_code_block(response)
    
    if code != None:
        test_cases = full_mbpp[i]['test_list']
        score = evaluate_code_with_tests(code, test_cases)
        
        d1['prompt'] = prompt
        d1['code'] = code
        d1['score'] = score
    if len(d1) != 0:
        data.append(d1)
    data.append(d2)
    
    if len(data) > 10:
        file_path = 'generator_rewards.jsonl'
        save_data(data, file_path)
        data = []
    

Test failed: assert max_chain_length([Pair(5, 24), Pair(15, 25),Pair(27, 40), Pair(50, 60)], 4) == 3
Test failed: assert max_chain_length([Pair(1, 2), Pair(3, 4),Pair(5, 6), Pair(7, 8)], 4) == 4
Test failed: assert max_chain_length([Pair(19, 10), Pair(11, 12),Pair(13, 14), Pair(15, 16), Pair(31, 54)], 5) == 5


Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
NameError: name 'max_chain_length' is not defined
Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
NameError: name 'max_chain_length' is not defined
Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
NameError: name 'max_chain_length' is not defined


Code failed to compile: invalid syntax (<string>, line 21)
Test failed: assert get_ludic(10) == [1, 2, 3, 5, 7]
Test failed: assert get_ludic(25) == [1, 2, 3, 5, 7, 11, 13, 17, 23, 25]
Test failed: assert get_ludic(45) == [1, 2, 3, 5, 7, 11, 13, 17, 23, 25, 29, 37, 41, 43]


Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
AssertionError
Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
AssertionError
Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
AssertionError


Code failed to compile: invalid syntax (<string>, line 9)
Code failed to compile: invalid syntax (<string>, line 11)
Code failed to compile: invalid syntax (<string>, line 6)
Test failed: assert find_literals('The quick brown fox jumps over the lazy dog.', 'fox') == ('fox', 16, 19)
Test failed: assert find_literals('Its been a very crazy procedure right', 'crazy') == ('crazy', 16, 21)
Test failed: assert find_literals('Hardest choices required strongest will', 'will') == ('will', 35, 39)


Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
AssertionError
Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
AssertionError
Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
AssertionError


Test failed: assert bell_Number(3) == 5
Test failed: assert bell_Number(4) == 15


Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
AssertionError
Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
AssertionError


Code failed to compile: EOL while scanning string literal (<string>, line 6)
Test failed: assert remove_kth_element([1,1,2,3,4,4,5,1],3)==[1, 1, 3, 4, 4, 5, 1]
Test failed: assert remove_kth_element([0, 0, 1, 2, 3, 4, 4, 5, 6, 6, 6, 7, 8, 9, 4, 4],4)==[0, 0, 1, 3, 4, 4, 5, 6, 6, 6, 7, 8, 9, 4, 4]


Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
AssertionError
Traceback (most recent call last):
  File "/tmp/ipykernel_15096/2250951485.py", line 19, in evaluate_code_with_tests
    exec(test, local_env)
  File "<string>", line 1, in <module>
AssertionError
