In [9]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from gpt2_python_benchmark import CodeBLEU

In [2]:
# Load model and tokenizer
model_name = "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

2025-04-20 11:25:00.059232: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745128500.071322   18661 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745128500.074892   18661 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1745128500.086685   18661 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745128500.086703   18661 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745128500.086705   18661 computation_placer.cc:177] computation placer alr

[2025-04-20 11:25:05,383] [INFO] [real_accelerator.py:239:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=3072, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=1024)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=4096, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=4096)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=50257, bias=False)
)

In [10]:
# ------------------ Alpaca-style Fine-tuning ------------------ #
def prepare_alpaca_dataset():
    """
    Load and filter the CodeAlpaca dataset for Python code examples.
    
    Returns:
        Filtered dataset with Python code examples
    """
    # Load dataset
    alpaca_data = load_dataset("HuggingFaceH4/CodeAlpaca_20K", split="train")
    
    # Filter for Python code
    python_keywords = ['def ', 'import ', 'lambda ']
    def is_python_code(text):
        return any(keyword in text for keyword in python_keywords)
    
    python_dataset = alpaca_data.filter(lambda example: is_python_code(example['completion']))
    return python_dataset

In [11]:
def generate_code(prompt, max_length=500, temperature=0.5):
    """
    Generate Python code given a natural language prompt.
    
    Args:
        prompt: Natural language description of the code to generate
        max_length: Maximum length of generated text
        temperature: Sampling temperature
        
    Returns:
        Generated code as a string
    """
    # Format the prompt for code generation
    formatted_prompt = f"# Python program to {prompt}\n\ndef"
    
    # Encode the prompt
    input_ids = tokenizer.encode(formatted_prompt, return_tensors="pt").to(device)
    
    # Generate code
    output = model.generate(
        input_ids,
        max_length=max_length,
        temperature=temperature,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.pad_token_id
    )
    
    # Decode the generated code
    generated_code = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # Extract the code part (remove the prompt)
    code = generated_code[len(formatted_prompt):]
    
    # Format the code for better presentation
    formatted_code = f"def{code}"
    
    return formatted_code

In [12]:
from data_loader import *
data_loader = BenchmarkDataManager("benchmark_data.json")
data_loader.load_data()
data = data_loader.get_all_data()

Loaded 20 benchmark questions from benchmark_data.json
Loaded 20 benchmark questions from benchmark_data.json


In [13]:
data = data[:5]

In [8]:
import tqdm 
evaluator = CodeBLEU()
for item in data:
    question = item["question"]
    reference = item["reference"]
    generated_code = generate_code(question)
    scores = evaluator.calculate_codebleu(reference, generated_code)
    print("Question: {}".format(question))
    print("reference: {}".format(reference))
    print("generated_code: {}".format(generated_code))
    print("scores: {}".format(scores))

Question: find the maximum element in a list
reference: def find_max(lst):
    if not lst:
        return None
    max_val = lst[0]
    for val in lst:
        if val > max_val:
            max_val = val
    return max_val
generated_code: def max_element ( list ):

"""

Returns the maximum element in a list.

Args:

list - list to search for

Returns:

The maximum element in the list

"""

if list.startswith( ' ' ):

return list.index( ' ' )

elif list.startswith( ' ' ):

return list.index( ' ' )

elif list.startswith( ' ' ):

return list.index( ' ' )

return max_element(list)

def max_list ( list ):

"""

Returns the maximum list element in a list.

Args:

list - list to search for

Returns:

The maximum list element in the list

"""

if list.startswith( ' ' ):

return list.index( ' ' )

elif list.startswith( ' ' ):

return list.index( ' ' )

elif list.startswith( ' ' ):

return list.index( ' ' )

return max_list(list)

def max_list_index ( list ):

"""

Returns the maximum list index

In [14]:
# Load the fine-tuned model and tokenizer
model_path = "outputs/gpt2-medium_3_eps"  # Path to your fine-tuned model
tokenizer = AutoTokenizer.from_pretrained(model_path)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=3072, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=1024)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=4096, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=4096)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=50257, bias=False)
)

In [None]:
import tqdm 
evaluator = CodeBLEU()
for item in data:
    question = item["question"]
    reference = item["reference"]
    generated_code = generate_code(question)
    scores = evaluator.calculate_codebleu(reference, generated_code)
    print("Question: {}".format(question))
    print("reference: {}".format(reference))
    print("generated_code: {}".format(generated_code))
    print("scores: {}".format(scores))

Question: find the maximum element in a list
reference: def find_max(lst):
    if not lst:
        return None
    max_val = lst[0]
    for val in lst:
        if val > max_val:
            max_val = val
    return max_val
generated_code: def find_max(lst):
    max = lst[0]
    for i in range(len(lst)):
        if lst[i] > max:
            max = lst[i]
    return max

if __name__ == '__main__':
    lst = [1, 2, 3, 4]
    print(find_max(lst))
    # Output: [3, 6, 8, 10]
    # Output: [7, 11, 15]
    print(find_max(lst))
    # Output: [9, 14, 17]
    print(find_max(lst))
    # Output: [15, 19, 23]
           print(find_max(lst))
                                                                                                                                                                                                                                                                         
scores: {'bleu': np.float64(0.08789687984635984), 'syntax': 0, 'dataflow': 0.0, 'keyword': 0.714285

: 