In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
cd /content/drive/MyDrive/Colab Notebooks/Github/AIPlus99/HW6

/content/drive/MyDrive/Colab Notebooks/Github/AIPlus99/HW6


In [3]:
!pip install bitsandbytes
!pip install flash-attn --no-build-isolation



In [4]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [5]:
from code_generator_1_3b import CodeGenerator
from requirement_to_code_dataset import get_train_dataloader
from code_discriminator import CodeDiscriminator
from concurrent.futures import ThreadPoolExecutor
import torch
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from nltk.tokenize import word_tokenize

test_loader = get_train_dataloader("unreal_code_dataset_test.jsonl", batch_size=90, shuffle=True, limit=90)

raw_generator = CodeGenerator()
SFT_generator = CodeGenerator(load_path="checkpoint_1_3b/generator_SFT")
PPO_generator = CodeGenerator(load_path="checkpoint_1_3b/generator")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
raw_generator.to(device)
SFT_generator.to(device)
PPO_generator.to(device)


def compute_bleu_scores(reference_texts: list[str], generated_texts: list[str]) -> float:
    smooth = SmoothingFunction().method1
    references = [[word_tokenize(ref)] for ref in reference_texts]
    candidates = [word_tokenize(gen) for gen in generated_texts]
    return corpus_bleu(references, candidates, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smooth)

def evaluate_bleu_score_for_generators(raw_gen, sft_gen, ppo_gen, dataloader, device):
    raw_gen.eval()
    sft_gen.eval()
    ppo_gen.eval()

    all_raw_preds, all_sft_preds, all_ppo_preds = [], [], []
    all_references = []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating BLEU"):
            prompts = batch["requirement"]
            header_gt = batch["header_code"]
            cpp_gt = batch["cpp_code"]
            ref_code = [h + "\n" + c for h, c in zip(header_gt, cpp_gt)]

            # Generate predictions
            raw_code = raw_gen.generate(prompts, return_text=True)
            sft_code = sft_gen.generate(prompts, return_text=True)
            ppo_code = ppo_gen.generate(prompts, return_text=True)

            # Accumulate
            all_references.extend(ref_code)
            all_raw_preds.extend(raw_code)
            all_sft_preds.extend(sft_code)
            all_ppo_preds.extend(ppo_code)

    # Compute BLEU scores
    bleu_raw = compute_bleu_scores(all_references, all_raw_preds)
    bleu_sft = compute_bleu_scores(all_references, all_sft_preds)
    bleu_ppo = compute_bleu_scores(all_references, all_ppo_preds)
    bleu_ref = compute_bleu_scores(all_references, all_references)

    return {
        "bleu_raw": bleu_raw,
        "bleu_sft": bleu_sft,
        "bleu_ppo": bleu_ppo,
        "bleu_ref": bleu_ref
    }

bleu_results = evaluate_bleu_score_for_generators(raw_generator, SFT_generator, PPO_generator, test_loader, device)

print(f"BLEU Raw Generator  : {bleu_results['bleu_raw']:.4f}")
print(f"BLEU SFT Generator  : {bleu_results['bleu_sft']:.4f}")
print(f"BLEU PPO Generator  : {bleu_results['bleu_ppo']:.4f}")
print(f"BLEU Ref : {bleu_results['bleu_ref']:.4f}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Initializing new model from base: Qwen/Qwen1.5-0.5B
Applying LoRA adaptation...
Freezing all parameters except LoRA blocks...
Trainable params: 1,572,864 / 465,560,576 (0.34%)
No value head to load
Loading model + LoRA from checkpoint_1_3b/generator_SFT
Freezing all parameters except LoRA blocks...
Trainable params: 1,572,864 / 465,560,576 (0.34%)
Loaded value head.
Loading model + LoRA from checkpoint_1_3b/generator
Freezing all parameters except LoRA blocks...
Trainable params: 1,572,864 / 465,560,576 (0.34%)
Loaded value head.


Evaluating BLEU:   0%|          | 0/1 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Evaluating BLEU: 100%|██████████| 1/1 [10:23<00:00, 623.61s/it]


BLEU Raw Generator  : 0.0284
BLEU SFT Generator  : 0.2816
BLEU PPO Generator  : 0.4506
BLEU Ref : 1.0000
