In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd /content/drive/MyDrive/Colab Notebooks/Github/AIPlus99/HW6

/content/drive/MyDrive/Colab Notebooks/Github/AIPlus99/HW6


In [None]:
!pip install bitsandbytes
!pip install flash-attn --no-build-isolation

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-

In [None]:
from code_generator_1_3b import CodeGenerator
from requirement_to_code_dataset import get_train_dataloader
from code_discriminator import CodeDiscriminator
from concurrent.futures import ThreadPoolExecutor
import torch
from tqdm import tqdm

# === 포맷 점수 계산 ===
FORMAT_PENALTY_WEIGHT_H = 1.0
FORMAT_PENALTY_WEIGHT_CPP = 1.0

def compute_format_penalties_h(header_texts):
    raw_penalties = torch.tensor([
        sum([
            -1.0 if not text.strip().startswith("```cpp") else 0.0,
            -2.0 if not text.strip().endswith("```") else 0.0,
            -1.0 if ("UCLASS" not in text and "USTRUCT" not in text and "UINTERFACE" not in text) else 0.0,
            -1.0 if "#pragma once" not in text else 0.0,
            -1.0 if '#include "CoreMinimal.h"' not in text else 0.0,
            -1.0 if '.generated.h' not in text else 0.0,
            -1.0 if 'GENERATED_BODY()' not in text else 0.0,
            -1.0 if '_API' not in text and ("UCLASS" in text or "UINTERFACE" in text) else 0.0,
        ])
        for text in header_texts
    ], dtype=torch.float32)
    return FORMAT_PENALTY_WEIGHT_H * raw_penalties

def compute_format_penalties_cpp(cpp_texts):
    raw_penalties = torch.tensor([
        sum([
            -1.0 if not text.strip().startswith("```cpp") else 0.0,
            -2.0 if not text.strip().endswith("```") else 0.0,
            -6.0 if "::" not in text else 0.0,
        ])
        for text in cpp_texts
    ], dtype=torch.float32)
    return FORMAT_PENALTY_WEIGHT_CPP * raw_penalties

# === 모델 및 데이터 ===
test_loader = get_train_dataloader("unreal_code_dataset_test.jsonl", batch_size=90, shuffle=True, limit=90)

raw_generator = CodeGenerator()
SFT_generator = CodeGenerator(load_path="checkpoint_1_3b/generator_SFT")
PPO_generator = CodeGenerator(load_path="checkpoint_1_3b/generator")

discriminator = CodeDiscriminator(load_path="./checkpoint_1_3b/discriminator")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
discriminator.to(device)
raw_generator.to(device)
SFT_generator.to(device)
PPO_generator.to(device)

# === 평가 함수 ===
def evaluate_all_generators_threaded(raw_gen, sft_gen, ppo_gen, disc, dataloader, device):
    disc.eval()
    raw_gen.eval()
    sft_gen.eval()
    ppo_gen.eval()

    raw_scores, sft_scores, ppo_scores, ref_scores = [], [], [], []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            prompts = batch["requirement"]
            header_gt = batch["header_code"]
            cpp_gt = batch["cpp_code"]
            reference_code = [h + "\n" + c for h, c in zip(header_gt, cpp_gt)]

            raw_code = raw_gen.generate(prompts, return_text=False)
            sft_code = sft_gen.generate(prompts, return_text=False)
            ppo_code = ppo_gen.generate(prompts, return_text=False)

            # === 코드 블럭 추출 ===
            h_raw, cpp_raw = raw_code["header_texts"], raw_code["cpp_texts"]
            h_sft, cpp_sft = sft_code["header_texts"], sft_code["cpp_texts"]
            h_ppo, cpp_ppo = ppo_code["header_texts"], ppo_code["cpp_texts"]
            h_ref, cpp_ref = header_gt, cpp_gt

            # === Discriminator 평가 ===
            def get_disc_scores(h_list, c_list, batch_size=30):
                texts = [h + "\n" + c for h, c in zip(h_list, c_list)]
                all_outputs = []

                for i in range(0, len(texts), batch_size):
                    batch_texts = texts[i:i + batch_size]
                    with torch.no_grad():
                        logits = disc(batch_texts)  # (B, 1) or (B,)
                        probs = torch.sigmoid(logits)
                        adjusted = 2 * probs - 1.0  # [-1.0, 1.0]
                        all_outputs.append(adjusted.squeeze().cpu())

                return torch.cat(all_outputs, dim=0)

            raw_output = get_disc_scores(h_raw, cpp_raw)
            sft_output = get_disc_scores(h_sft, cpp_sft)
            ppo_output = get_disc_scores(h_ppo, cpp_ppo)
            ref_output = get_disc_scores(h_ref, cpp_ref)

            # === 포맷 패널티 계산 ===
            penalty_raw = compute_format_penalties_h(h_raw).mean() + compute_format_penalties_cpp(cpp_raw).mean()
            penalty_sft = compute_format_penalties_h(h_sft).mean() + compute_format_penalties_cpp(cpp_sft).mean()
            penalty_ppo = compute_format_penalties_h(h_ppo).mean() + compute_format_penalties_cpp(cpp_ppo).mean()
            penalty_ref = compute_format_penalties_h(h_ref).mean() + compute_format_penalties_cpp(cpp_ref).mean()

            score_raw = raw_output + penalty_raw
            score_sft = sft_output + penalty_sft
            score_ppo = ppo_output + penalty_ppo
            score_ref = ref_output + penalty_ref

            score_range = (score_ref - score_raw).clamp(min=1e-6)
            sft_score = ((score_sft - score_raw) / score_range * 100)
            ppo_score = ((score_ppo - score_raw) / score_range * 100)

            raw_scores.append(torch.tensor(0.0))
            sft_scores.append(sft_score)
            ppo_scores.append(ppo_score)
            ref_scores.append(torch.tensor(100.0))

    return {
        "raw": torch.stack(raw_scores).mean().item(),
        "sft": torch.stack(sft_scores).mean().item(),
        "ppo": torch.stack(ppo_scores).mean().item(),
        "ref": torch.stack(ref_scores).mean().item(),
    }

# === 평가 실행 ===
results = evaluate_all_generators_threaded(raw_generator, SFT_generator, PPO_generator, discriminator, test_loader, device)

# === 결과 출력 ===
print(f"Raw Generator Score : {results['raw']:.2f}")
print(f"SFT Generator Score : {results['sft']:.2f}")
print(f"PPO Generator Score : {results['ppo']:.2f}")
print(f"Reference Score     : {results['ref']:.2f}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Initializing new model from base: Qwen/Qwen1.5-0.5B


config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

Applying LoRA adaptation...
Freezing all parameters except LoRA blocks...
Trainable params: 1,572,864 / 465,560,576 (0.34%)


config.json:   0%|          | 0.00/498 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

No value head to load
Loading model + LoRA from checkpoint_1_3b/generator_SFT
Freezing all parameters except LoRA blocks...
Trainable params: 1,572,864 / 465,560,576 (0.34%)
Loaded value head.
Loading model + LoRA from checkpoint_1_3b/generator
Freezing all parameters except LoRA blocks...
Trainable params: 1,572,864 / 465,560,576 (0.34%)
Loaded value head.
Loading discriminator from ./checkpoint_1_3b/discriminator


Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loaded classifier head from ./checkpoint_1_3b/discriminator/classifier.pt


Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Evaluating: 100%|██████████| 1/1 [10:15<00:00, 615.63s/it]

Raw Generator Score : 0.00
SFT Generator Score : 86.11
PPO Generator Score : 97.59
Reference Score     : 100.00



