In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
cd /content/drive/MyDrive/Colab Notebooks/Github/AIPlus99/HW6

/content/drive/MyDrive/Colab Notebooks/Github/AIPlus99/HW6


In [3]:
!pip install bitsandbytes
!pip install flash-attn --no-build-isolation



In [4]:
# PPO-based training loop for CodeGenerator
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
import torch
from torch import nn
from copy import deepcopy
from tqdm import tqdm
from code_generator_1_3b import CodeGenerator
from code_discriminator import CodeDiscriminator
from requirement_to_code_dataset import get_train_dataloader
from value_head import ValueHead
from bitsandbytes.optim import AdamW8bit
import gc

def batched_discriminator(discriminator, texts, batch_size=1):
    preds = []
    for i in range(0, len(texts), batch_size):
        chunk = texts[i:i+batch_size]
        logits = discriminator(chunk)
        preds.append(logits.cpu())
    return torch.cat(preds, dim=0).to(discriminator.classifier[0].weight.device)

FORMAT_PENALTY_WEIGHT_H = 1.0 / 1.0
FORMAT_PENALTY_WEIGHT_CPP = 1.0 / 1.0

def compute_format_penalties_h(header_texts):
    raw_penalties = torch.tensor([
        sum([
            -1.0 if not text.strip().startswith("```cpp") else 0.0,
            -2.0 if not text.strip().endswith("```") else 0.0,
            -1.0 if ("UCLASS" not in text and "USTRUCT" not in text and "UINTERFACE" not in text) else 0.0,
            -1.0 if "#pragma once" not in text else 0.0,
            -1.0 if '#include "CoreMinimal.h"' not in text else 0.0,
            -1.0 if '.generated.h' not in text else 0.0,
            -1.0 if 'GENERATED_BODY()' not in text else 0.0,
            -1.0 if '_API' not in text and ("UCLASS" in text or "UINTERFACE" in text) else 0.0,
        ])
        for text in header_texts
    ], dtype=torch.float32)
    return FORMAT_PENALTY_WEIGHT_H * raw_penalties

def compute_format_penalties_cpp(cpp_texts):
    raw_penalties = torch.tensor([
        sum([
            -1.0 if not text.strip().startswith("```cpp") else 0.0,
            -2.0 if not text.strip().endswith("```") else 0.0,
            -6.0 if "::" not in text else 0.0,
        ])
        for text in cpp_texts
    ], dtype=torch.float32)
    return FORMAT_PENALTY_WEIGHT_CPP * raw_penalties

def compute_log_ratios(log_probs, ref_log_probs, clip=10):
    log_ratio = (log_probs - ref_log_probs).clamp(min=-clip, max=clip)
    return torch.exp(log_ratio)

def compute_advantages(rewards, values, normalize=False):
    adv = rewards - values.detach()
    if normalize:
        return (adv - adv.mean()) / (adv.std() + 1e-6)
    else:
        return adv

def compute_ppo_loss(ratios, advantages, clip_eps):
    ratios = ratios.squeeze()
    advantages = advantages.squeeze()
    clipped = torch.clamp(ratios, 1 - clip_eps, 1 + clip_eps)

    surrogate1 = ratios * advantages
    surrogate2 = clipped * advantages

    surrogate = torch.where(
        advantages >= 0,
        torch.min(surrogate1, surrogate2),
        torch.max(surrogate1, surrogate2)
    )

    return -torch.mean(surrogate)

def compute_value_loss(values_disc, reward_disc, values_format, reward_format):
    loss_disc = nn.functional.mse_loss(values_disc, reward_disc.to(values_disc.dtype))
    loss_format = nn.functional.mse_loss(values_format, reward_format.to(values_format.dtype))
    return loss_disc + loss_format

def compute_sft_loss(generator, tokenizer, prompts, targets):
    joined_inputs = [p + t for p, t in zip(prompts, targets)]
    inputs = tokenizer(joined_inputs, return_tensors="pt", padding=True, truncation=True,
                       max_length=generator.max_length).to(generator.device)
    labels = inputs["input_ids"].clone()

    prompt_inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True,
                              max_length=generator.max_length)
    prompt_lens = [len(p[p != tokenizer.pad_token_id]) for p in prompt_inputs["input_ids"]]
    for i, l in enumerate(prompt_lens):
        labels[i, :l] = -100

    output = generator(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], labels=labels)
    return output.loss


def update_generator(total_loss, generator, g_optimizer):
    total_loss.backward()
    torch.nn.utils.clip_grad_norm_(generator.parameters(), 1.0)
    g_optimizer.step()
    g_optimizer.zero_grad()

def update_discriminator(discriminator, d_optimizer, reference_texts, generated_texts, device):
    all_texts = reference_texts + generated_texts
    labels = torch.tensor([1] * len(reference_texts) + [0] * len(generated_texts), dtype=torch.float32).to(device)
    preds = batched_discriminator(discriminator, all_texts)
    loss = nn.BCEWithLogitsLoss()(preds, labels)

    d_optimizer.zero_grad()
    loss.backward()
    d_optimizer.step()


def train(generator, train_loader, epochs=3, lr=2e-5, clip_eps=0.2, do_PPO=True):
    generator.train()

    discriminator = CodeDiscriminator(load_path="./checkpoint_1_3b/discriminator").to(device)

    discriminator.train()

    ref_generator = CodeGenerator()

    ref_generator = deepcopy(generator)
    ref_generator.eval()
    for param in ref_generator.parameters():
        param.requires_grad = False

    g_optimizer = AdamW8bit(generator.parameters(), lr=lr)

    d_optimizer = AdamW8bit(discriminator.parameters(), lr=1e-4)

    os.makedirs("./checkpoint_1_3b", exist_ok=True)

    sft_coef = 0.5
    value_coef = 4.0

    do_SFT_h = False
    do_SFT_cpp = False

    if do_PPO == False:
        do_SFT_h = True
        do_SFT_cpp = True

    for epoch in range(epochs):
        loop = tqdm(train_loader)

        for step, batch in enumerate(loop):
            torch.cuda.empty_cache()

            requirements = batch["requirement"]
            reference_headers = batch["header_code"]
            reference_cpps = batch["cpp_code"]

            # === [Phase 1] Header PPO Sampling ===

            header_prompts = [generator.create_short_prompt_header(r) for r in requirements]

            # === [Phase 1-A] SFT Loss & Step ===
            if do_SFT_h:
                sft_loss = compute_sft_loss(generator, generator.tokenizer, header_prompts, reference_headers)
                print(f"\nsft_loss : {sft_loss}")
                sft_loss = sft_loss * sft_coef
                g_optimizer.zero_grad()
                sft_loss.backward()
                g_optimizer.step()

                # Cleanup SFT memory
                sft_loss = sft_loss.detach()
                del sft_loss
                torch.cuda.empty_cache()
                gc.collect()

            if do_PPO == True:
                # === [Phase 1-B] PPO Loss ===
                header_output = generator.sample_header_with_partial_grad(requirements, max_track_tokens=512)
                print("\n===== .h =====")
                print(header_output["header_texts"][0])
                print(header_output["header_texts"][1])

                header_texts = header_output["header_texts"]

                with torch.no_grad():
                    ref_header_output = ref_generator.sample_header_with_partial_grad(requirements, max_track_tokens=512)

                with torch.no_grad():
                    d_scores = torch.sigmoid(discriminator(header_texts)).squeeze()
                    reward_disc = 2 * (d_scores - 0.5)
                    format_penalties = compute_format_penalties_h(header_texts).to(generator.device)
                    if format_penalties.mean().item() < -0.01:
                        print("format_penalties < -0.01")
                        do_SFT_h = True
                    else:
                        do_SFT_h = False
                    header_rewards = reward_disc + format_penalties

                log_probs     = header_output["header_log_probs"].mean(dim=1)
                ref_log_probs = ref_header_output["header_log_probs"].mean(dim=1)
                ratios        = compute_log_ratios(log_probs, ref_log_probs)

                values_total, values_disc, values_format = generator.compute_value(header_prompts, header_texts, mode="h")
                advantages = compute_advantages(header_rewards, values_total)

                print(f"reward_disc : {reward_disc}")
                print(f"values_disc : {values_disc}")

                print(f"format_penalties : {format_penalties}")
                print(f"values_format : {values_format}")

                print(f"ratios : {ratios}")
                print(f"advantages : {advantages}")
                ppo_loss   = compute_ppo_loss(ratios, advantages, clip_eps)
                value_loss = compute_value_loss(values_disc, reward_disc, values_format, format_penalties)
                total_loss = ppo_loss + value_coef * value_loss

                # PPO update
                g_optimizer.zero_grad()
                total_loss.backward()
                g_optimizer.step()

                # === Discriminator training ===
                if reward_disc.mean().item() > -0.4 or step % 10 == 0:
                    print(f"\n[Step {step}] Training header discriminator...")
                    update_discriminator(discriminator, d_optimizer, reference_headers, header_texts, generator.device)

                # === Logging ===
                loop.set_description(f"[Epoch {epoch+1}] Header")
                loop.set_postfix({
                    "ppo":    ppo_loss.item(),
                    "value":  value_loss.item(),
                    "reward": header_rewards.mean().item()
                })

                # === Cleanup ===
                for var_name in [
                    "log_probs", "ref_log_probs", "ratios",
                    "values_total", "values_disc", "values_format",
                    "d_scores", "reward_disc", "format_penalties", "header_rewards",
                    "advantages", "ppo_loss", "value_loss", "total_loss"
                ]:
                    var = locals().get(var_name)
                    if isinstance(var, torch.Tensor):
                        locals()[var_name] = var.detach()
                    del var

                del header_output, ref_header_output, header_texts, header_prompts
                torch.cuda.empty_cache()


            # === [Phase 2] CPP PPO 학습 ===

            cpp_prompts = [generator.create_short_prompt_cpp(r, h) for r, h in zip(requirements, reference_headers)]

            # === [Phase 2-A] SFT Loss & Step ===
            if do_SFT_cpp:
                sft_loss = compute_sft_loss(generator, generator.tokenizer, cpp_prompts, reference_cpps)
                print(f"\nsft_loss : {sft_loss}")
                sft_loss = sft_loss * sft_coef
                g_optimizer.zero_grad()
                sft_loss.backward()
                g_optimizer.step()

                # Clean up SFT memory
                sft_loss = sft_loss.detach()
                del sft_loss
                torch.cuda.empty_cache()
                gc.collect()


            if do_PPO == True:
                # === [Phase 2-B] PPO + Value Loss ===

                cpp_output = generator.sample_cpp_with_partial_grad(requirements, reference_headers, max_track_tokens=320)
                print("\n===== .cpp =====")
                print(cpp_output["cpp_texts"][0])
                print(cpp_output["cpp_texts"][1])

                cpp_texts = cpp_output["cpp_texts"]

                with torch.no_grad():
                    ref_cpp_output = ref_generator.sample_cpp_with_partial_grad(requirements, reference_headers, max_track_tokens=320)

                with torch.no_grad():
                    d_scores = torch.sigmoid(discriminator(cpp_texts)).squeeze()
                    reward_disc = 2 * (d_scores - 0.5)
                    format_penalties = compute_format_penalties_cpp(cpp_texts).to(generator.device)
                    if format_penalties.mean().item() < -0.01:
                        print("format_penalties < -0.01")
                        do_SFT_cpp = True
                    else:
                        do_SFT_cpp = False
                    cpp_rewards = reward_disc + format_penalties

                log_probs     = cpp_output["cpp_log_probs"].mean(dim=1)
                ref_log_probs = ref_cpp_output["cpp_log_probs"].mean(dim=1)
                ratios        = compute_log_ratios(log_probs, ref_log_probs)

                values_total, values_disc, values_format = generator.compute_value(cpp_prompts, cpp_texts, mode="cpp")
                advantages = compute_advantages(cpp_rewards, values_total)

                print(f"reward_disc : {reward_disc}")
                print(f"values_disc : {values_disc}")

                print(f"format_penalties : {format_penalties}")
                print(f"values_format : {values_format}")

                print(f"ratios : {ratios}")
                print(f"advantages : {advantages}")
                ppo_loss   = compute_ppo_loss(ratios, advantages, clip_eps)
                value_loss = compute_value_loss(values_disc, reward_disc, values_format, format_penalties)
                total_loss = ppo_loss + value_coef * value_loss

                # PPO step
                g_optimizer.zero_grad()
                total_loss.backward()
                g_optimizer.step()

                # === Train Discriminator on CPP ===
                if reward_disc.mean().item() > -0.4 or step % 10 == 0:
                    print(f"\n[Step {step}] Training cpp discriminator...")
                    update_discriminator(discriminator, d_optimizer, reference_cpps, cpp_texts, generator.device)

                # === Logging ===
                loop.set_description(f"[Epoch {epoch+1}] CPP")
                loop.set_postfix({
                    "ppo":    ppo_loss.item(),
                    "value":  value_loss.item(),
                    "reward": cpp_rewards.mean().item()
                })

                # === Cleanup ===
                for var_name in [
                    "log_probs", "ref_log_probs", "ratios",
                    "values_total", "values_disc", "values_format",
                    "d_scores", "reward_disc", "format_penalties", "cpp_rewards",
                    "advantages", "ppo_loss", "value_loss", "total_loss"
                ]:
                    var = locals().get(var_name)
                    if isinstance(var, torch.Tensor):
                        locals()[var_name] = var.detach()
                    del var

                del cpp_output, ref_cpp_output, cpp_texts, cpp_prompts

                torch.cuda.empty_cache()


            # === 주기적 ref_generator 동기화 ===
            if step % 10 == 0:
                print(f"\n[Step {step}] Ref generator updated.")
                del ref_generator

                ref_generator = deepcopy(generator)
                ref_generator.eval()
                ref_generator = ref_generator.to(generator.device)
                for p in ref_generator.parameters():
                    p.requires_grad = False

                """
                with torch.no_grad():
                    h_generated_responses = generator.sample_header_with_partial_grad(["Create a character class with health and mana properties"],  max_track_tokens=0)
                    cpp_generated_responses = generator.sample_cpp_with_partial_grad(["Create a character class with health and mana properties"], h_generated_responses["header_texts"], max_track_tokens=0)

                print("\n===== .h =====")
                print(h_generated_responses["header_texts"][0])
                print("\n===== .cpp =====")
                print(cpp_generated_responses["cpp_texts"][0])
                """
                # Save checkpoint
                generator.save("./checkpoint_1_3b/generator_SFT")
                discriminator.save("./checkpoint_1_3b/discriminator_SFT")

        with torch.no_grad():
            h_generated_responses = generator.sample_header_with_partial_grad(["Create a character class with health and mana properties"],max_track_tokens=0)
            cpp_generated_responses = generator.sample_cpp_with_partial_grad(["Create a character class with health and mana properties"], h_generated_responses["header_texts"],max_track_tokens=0)
        print("\n===== .h =====")
        print(h_generated_responses["header_texts"][0])
        print("\n===== .cpp =====")
        print(cpp_generated_responses["cpp_texts"][0])


if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    generator = CodeGenerator()

    with torch.no_grad():
        h_generated_responses = generator.sample_header_with_partial_grad(["Create a character class with health and mana properties"],  max_track_tokens=0)
        cpp_generated_responses = generator.sample_cpp_with_partial_grad(["Create a character class with health and mana properties"], h_generated_responses["header_texts"], max_track_tokens=0)
    print("\n===== .h =====")
    print(h_generated_responses["header_texts"][0])
    print("\n===== .cpp =====")
    print(cpp_generated_responses["cpp_texts"][0])

    train_loader = get_train_dataloader("unreal_code_dataset.jsonl", batch_size=8, shuffle=True, limit= 1160)

    train(generator, train_loader, epochs=3, do_PPO=False)

    with torch.no_grad():
        h_generated_responses = generator.sample_header_with_partial_grad(["Create a character class with health and mana properties"],  max_track_tokens=0)
        cpp_generated_responses = generator.sample_cpp_with_partial_grad(["Create a character class with health and mana properties"], h_generated_responses["header_texts"], max_track_tokens=0)
    print("\n===== .h =====")
    print(h_generated_responses["header_texts"][0])
    print("\n===== .cpp =====")
    print(cpp_generated_responses["cpp_texts"][0])



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Initializing new model from base: Qwen/Qwen1.5-0.5B
Applying LoRA adaptation...
Freezing all parameters except LoRA blocks...
Trainable params: 1,572,864 / 465,560,576 (0.34%)
No value head to load

===== .h =====
```cpp
#include <UCLASS>
#include <iostream>
using namespace std;

class Character
{
private:
    int health;
    int mana;
public:
    Character();
    Character(int h, int m);
    ~Character();
    void setHealth(int h);
    void setMana(int m);
    int getHealth();
    int getMana();
};

int main()
{
    Character c1(10, 10);
    Character c2(20, 20);
    c1.setHealth(15);
    c2.setMana(25);
    c1.setHealth(10);
    c2.setMana(25);
    cout << "Health: " << c1.getHealth() << endl;
    cout << "Mana: " << c2.getMana() << endl;
    return 0;
}
```


===== .cpp =====
```cpp
#include <UCLASS>
#include <iostream>
using namespace std;

class Character
{
private:
    int health;
    int mana;
public:
    Character();
    Character(int h, int m);
    ~Character();
    void setHe

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading discriminator from ./checkpoint_1_3b/discriminator
Loaded classifier head from ./checkpoint_1_3b/discriminator/classifier.pt
Initializing new model from base: Qwen/Qwen1.5-0.5B
Applying LoRA adaptation...
Freezing all parameters except LoRA blocks...
Trainable params: 1,572,864 / 465,560,576 (0.34%)
No value head to load


  0%|          | 0/145 [00:00<?, ?it/s]


sft_loss : 2.1048312187194824

sft_loss : 4.7335076332092285

[Step 0] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


  1%|          | 1/145 [00:06<15:18,  6.38s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 2.003296375274658

sft_loss : 4.861289024353027


  1%|▏         | 2/145 [00:07<08:26,  3.54s/it]


sft_loss : 1.9376194477081299

sft_loss : 1.1324185132980347


  2%|▏         | 3/145 [00:09<06:12,  2.62s/it]


sft_loss : 1.8167529106140137

sft_loss : 1.5925097465515137


  3%|▎         | 4/145 [00:10<05:09,  2.19s/it]


sft_loss : 2.182502269744873

sft_loss : 2.026303768157959


  3%|▎         | 5/145 [00:12<04:32,  1.95s/it]


sft_loss : 2.296091079711914

sft_loss : 2.568288803100586


  4%|▍         | 6/145 [00:14<04:12,  1.81s/it]


sft_loss : 1.997699499130249

sft_loss : 1.9452553987503052


  5%|▍         | 7/145 [00:15<03:57,  1.72s/it]


sft_loss : 1.8616420030593872

sft_loss : 5.60545539855957


  6%|▌         | 8/145 [00:17<03:49,  1.67s/it]


sft_loss : 1.8569637537002563

sft_loss : 2.325833797454834


  6%|▌         | 9/145 [00:18<03:40,  1.62s/it]


sft_loss : 1.694636344909668

sft_loss : 1.1847901344299316


  7%|▋         | 10/145 [00:20<03:33,  1.58s/it]


sft_loss : 3.20481276512146

sft_loss : 3.0915708541870117

[Step 10] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


  8%|▊         | 11/145 [00:26<06:45,  3.02s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 2.4139864444732666

sft_loss : 3.4004383087158203


  8%|▊         | 12/145 [00:28<05:42,  2.58s/it]


sft_loss : 1.8054897785186768

sft_loss : 2.80125093460083


  9%|▉         | 13/145 [00:29<04:59,  2.27s/it]


sft_loss : 2.08313250541687

sft_loss : 2.70007061958313


 10%|▉         | 14/145 [00:31<04:27,  2.04s/it]


sft_loss : 1.6086289882659912

sft_loss : 6.110308647155762


 10%|█         | 15/145 [00:32<04:08,  1.91s/it]


sft_loss : 1.5772790908813477

sft_loss : 2.229525089263916


 11%|█         | 16/145 [00:34<03:51,  1.80s/it]


sft_loss : 1.9110239744186401

sft_loss : 1.8266507387161255


 12%|█▏        | 17/145 [00:35<03:38,  1.71s/it]


sft_loss : 1.6492823362350464

sft_loss : 3.598097562789917


 12%|█▏        | 18/145 [00:37<03:29,  1.65s/it]


sft_loss : 1.868765115737915

sft_loss : 2.7661802768707275


 13%|█▎        | 19/145 [00:38<03:23,  1.62s/it]


sft_loss : 1.779442548751831

sft_loss : 5.886510372161865


 14%|█▍        | 20/145 [00:40<03:19,  1.60s/it]


sft_loss : 1.9000256061553955

sft_loss : 2.016019344329834

[Step 20] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 14%|█▍        | 21/145 [00:46<06:11,  3.00s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 1.901344895362854

sft_loss : 3.1328024864196777


 15%|█▌        | 22/145 [00:48<05:15,  2.56s/it]


sft_loss : 1.664332628250122

sft_loss : 1.151943564414978


 16%|█▌        | 23/145 [00:49<04:35,  2.26s/it]


sft_loss : 1.6523598432540894

sft_loss : 6.28377628326416


 17%|█▋        | 24/145 [00:51<04:10,  2.07s/it]


sft_loss : 1.8327831029891968

sft_loss : 4.0981669425964355


 17%|█▋        | 25/145 [00:52<03:50,  1.92s/it]


sft_loss : 1.7435271739959717

sft_loss : 2.022040367126465


 18%|█▊        | 26/145 [00:54<03:34,  1.80s/it]


sft_loss : 1.6588811874389648

sft_loss : 2.1855762004852295


 19%|█▊        | 27/145 [00:55<03:23,  1.72s/it]


sft_loss : 3.683454751968384

sft_loss : 7.944808006286621


 19%|█▉        | 28/145 [00:57<03:17,  1.69s/it]


sft_loss : 1.3506731986999512

sft_loss : 1.3767975568771362


 20%|██        | 29/145 [00:59<03:11,  1.65s/it]


sft_loss : 1.3799687623977661

sft_loss : 4.212881088256836


 21%|██        | 30/145 [01:00<03:07,  1.63s/it]


sft_loss : 2.133293628692627

sft_loss : 3.134890079498291

[Step 30] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 21%|██▏       | 31/145 [01:07<05:50,  3.07s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 1.5999237298965454

sft_loss : 3.299463987350464


 22%|██▏       | 32/145 [01:08<04:54,  2.61s/it]


sft_loss : 1.422866702079773

sft_loss : 1.5121848583221436


 23%|██▎       | 33/145 [01:10<04:15,  2.28s/it]


sft_loss : 1.7327178716659546

sft_loss : 7.299235820770264


 23%|██▎       | 34/145 [01:11<03:51,  2.09s/it]


sft_loss : 1.6460351943969727

sft_loss : 5.057699203491211


 24%|██▍       | 35/145 [01:13<03:32,  1.93s/it]


sft_loss : 1.478013515472412

sft_loss : 1.3828274011611938


 25%|██▍       | 36/145 [01:14<03:18,  1.82s/it]


sft_loss : 1.4310047626495361

sft_loss : 3.5387520790100098


 26%|██▌       | 37/145 [01:16<03:06,  1.72s/it]


sft_loss : 1.2448582649230957

sft_loss : 1.446786880493164


 26%|██▌       | 38/145 [01:17<02:57,  1.66s/it]


sft_loss : 2.442587375640869

sft_loss : 4.58160400390625


 27%|██▋       | 39/145 [01:19<02:53,  1.64s/it]


sft_loss : 2.1084511280059814

sft_loss : 4.715292930603027


 28%|██▊       | 40/145 [01:21<02:51,  1.63s/it]


sft_loss : 2.315321207046509

sft_loss : 2.881634473800659

[Step 40] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 28%|██▊       | 41/145 [01:27<05:16,  3.05s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 1.320035457611084

sft_loss : 1.1095526218414307


 29%|██▉       | 42/145 [01:28<04:25,  2.58s/it]


sft_loss : 1.2575528621673584

sft_loss : 3.7434804439544678


 30%|██▉       | 43/145 [01:30<03:50,  2.26s/it]


sft_loss : 1.2601518630981445

sft_loss : 1.9339181184768677


 30%|███       | 44/145 [01:32<03:26,  2.05s/it]


sft_loss : 1.113319754600525

sft_loss : 4.387770175933838


 31%|███       | 45/145 [01:33<03:10,  1.90s/it]


sft_loss : 1.3698972463607788

sft_loss : 4.5724196434021


 32%|███▏      | 46/145 [01:35<02:57,  1.79s/it]


sft_loss : 1.3160061836242676

sft_loss : 1.106419563293457


 32%|███▏      | 47/145 [01:36<02:46,  1.70s/it]


sft_loss : 3.1148245334625244

sft_loss : 1.2713061571121216


 33%|███▎      | 48/145 [01:38<02:40,  1.65s/it]


sft_loss : 1.3004131317138672

sft_loss : 6.559048652648926


 34%|███▍      | 49/145 [01:39<02:36,  1.63s/it]


sft_loss : 1.2497152090072632

sft_loss : 1.5527466535568237


 34%|███▍      | 50/145 [01:41<02:31,  1.59s/it]


sft_loss : 1.2024997472763062

sft_loss : 3.30110239982605

[Step 50] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 35%|███▌      | 51/145 [01:47<04:37,  2.95s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 2.1743929386138916

sft_loss : 4.039920806884766


 36%|███▌      | 52/145 [01:48<03:56,  2.54s/it]


sft_loss : 1.2693195343017578

sft_loss : 2.5800867080688477


 37%|███▋      | 53/145 [01:50<03:26,  2.25s/it]


sft_loss : 1.190361738204956

sft_loss : 3.107017755508423


 37%|███▋      | 54/145 [01:52<03:05,  2.04s/it]


sft_loss : 1.5083431005477905

sft_loss : 6.289116382598877


 38%|███▊      | 55/145 [01:53<02:51,  1.91s/it]


sft_loss : 1.155421257019043

sft_loss : 0.9830209016799927


 39%|███▊      | 56/145 [01:55<02:39,  1.79s/it]


sft_loss : 1.0544407367706299

sft_loss : 1.5525966882705688


 39%|███▉      | 57/145 [01:56<02:30,  1.71s/it]


sft_loss : 1.0285142660140991

sft_loss : 3.0261900424957275


 40%|████      | 58/145 [01:58<02:24,  1.66s/it]


sft_loss : 0.9641481041908264

sft_loss : 3.2470829486846924


 41%|████      | 59/145 [01:59<02:19,  1.62s/it]


sft_loss : 1.3190580606460571

sft_loss : 3.255471706390381


 41%|████▏     | 60/145 [02:01<02:16,  1.60s/it]


sft_loss : 1.1328332424163818

sft_loss : 5.094148635864258

[Step 60] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 42%|████▏     | 61/145 [02:07<04:12,  3.00s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 1.0855355262756348

sft_loss : 2.604285955429077


 43%|████▎     | 62/145 [02:09<03:32,  2.57s/it]


sft_loss : 1.0020252466201782

sft_loss : 1.864938735961914


 43%|████▎     | 63/145 [02:10<03:05,  2.26s/it]


sft_loss : 0.9638136625289917

sft_loss : 3.2486417293548584


 44%|████▍     | 64/145 [02:12<02:45,  2.04s/it]


sft_loss : 0.8220266699790955

sft_loss : 3.454942464828491


 45%|████▍     | 65/145 [02:13<02:31,  1.90s/it]


sft_loss : 0.9114456176757812

sft_loss : 7.609445095062256


 46%|████▌     | 66/145 [02:15<02:23,  1.81s/it]


sft_loss : 1.0106040239334106

sft_loss : 4.484696865081787


 46%|████▌     | 67/145 [02:16<02:14,  1.73s/it]


sft_loss : 0.9762048125267029

sft_loss : 1.7732226848602295


 47%|████▋     | 68/145 [02:18<02:08,  1.67s/it]


sft_loss : 0.8593692779541016

sft_loss : 3.4696991443634033


 48%|████▊     | 69/145 [02:20<02:04,  1.63s/it]


sft_loss : 1.0026077032089233

sft_loss : 2.753765821456909


 48%|████▊     | 70/145 [02:21<02:01,  1.62s/it]


sft_loss : 0.9123222231864929

sft_loss : 1.2721885442733765

[Step 70] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 49%|████▉     | 71/145 [02:27<03:44,  3.03s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 1.6669715642929077

sft_loss : 0.8253369927406311


 50%|████▉     | 72/145 [02:29<03:08,  2.58s/it]


sft_loss : 0.7158918976783752

sft_loss : 2.502129554748535


 50%|█████     | 73/145 [02:30<02:43,  2.27s/it]


sft_loss : 0.9598549008369446

sft_loss : 1.5457872152328491


 51%|█████     | 74/145 [02:32<02:24,  2.04s/it]


sft_loss : 0.8532312512397766

sft_loss : 3.525435209274292


 52%|█████▏    | 75/145 [02:34<02:13,  1.90s/it]


sft_loss : 0.778245210647583

sft_loss : 4.218320369720459


 52%|█████▏    | 76/145 [02:35<02:04,  1.80s/it]


sft_loss : 0.8240225315093994

sft_loss : 2.029778480529785


 53%|█████▎    | 77/145 [02:37<01:56,  1.72s/it]


sft_loss : 0.8261615037918091

sft_loss : 2.2144060134887695


 54%|█████▍    | 78/145 [02:38<01:51,  1.66s/it]


sft_loss : 0.8154776692390442

sft_loss : 2.0757908821105957


 54%|█████▍    | 79/145 [02:40<01:46,  1.61s/it]


sft_loss : 0.7664183974266052

sft_loss : 0.9382392168045044


 55%|█████▌    | 80/145 [02:41<01:43,  1.59s/it]


sft_loss : 1.2667007446289062

sft_loss : 1.7692240476608276

[Step 80] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 56%|█████▌    | 81/145 [02:48<03:13,  3.03s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.719144344329834

sft_loss : 2.3393471240997314


 57%|█████▋    | 82/145 [02:49<02:42,  2.59s/it]


sft_loss : 0.8751301169395447

sft_loss : 1.819657325744629


 57%|█████▋    | 83/145 [02:51<02:20,  2.27s/it]


sft_loss : 0.6840619444847107

sft_loss : 0.8206005096435547


 58%|█████▊    | 84/145 [02:52<02:04,  2.04s/it]


sft_loss : 0.8709169030189514

sft_loss : 0.7470889687538147


 59%|█████▊    | 85/145 [02:54<01:52,  1.88s/it]


sft_loss : 0.6335981488227844

sft_loss : 0.7470217347145081


 59%|█████▉    | 86/145 [02:55<01:44,  1.77s/it]


sft_loss : 0.7858021855354309

sft_loss : 4.707981586456299


 60%|██████    | 87/145 [02:57<01:39,  1.72s/it]


sft_loss : 0.7208454012870789

sft_loss : 0.8750532269477844


 61%|██████    | 88/145 [02:58<01:34,  1.66s/it]


sft_loss : 1.6427280902862549

sft_loss : 1.5169939994812012


 61%|██████▏   | 89/145 [03:00<01:30,  1.62s/it]


sft_loss : 0.6299163699150085

sft_loss : 4.2977471351623535


 62%|██████▏   | 90/145 [03:01<01:28,  1.60s/it]


sft_loss : 0.7068589329719543

sft_loss : 0.8302950859069824

[Step 90] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 63%|██████▎   | 91/145 [03:08<02:41,  3.00s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.8269153237342834

sft_loss : 1.9813929796218872


 63%|██████▎   | 92/145 [03:09<02:16,  2.57s/it]


sft_loss : 1.4096791744232178

sft_loss : 1.5320326089859009


 64%|██████▍   | 93/145 [03:11<01:57,  2.26s/it]


sft_loss : 0.9406397342681885

sft_loss : 2.117842197418213


 65%|██████▍   | 94/145 [03:12<01:44,  2.04s/it]


sft_loss : 0.8246340155601501

sft_loss : 1.7488398551940918


 66%|██████▌   | 95/145 [03:14<01:34,  1.89s/it]


sft_loss : 0.6897677183151245

sft_loss : 0.9466442465782166


 66%|██████▌   | 96/145 [03:15<01:27,  1.79s/it]


sft_loss : 0.7376590967178345

sft_loss : 1.6997792720794678


 67%|██████▋   | 97/145 [03:17<01:21,  1.70s/it]


sft_loss : 0.9270493984222412

sft_loss : 2.5640220642089844


 68%|██████▊   | 98/145 [03:18<01:17,  1.65s/it]


sft_loss : 0.6610335111618042

sft_loss : 0.7652133107185364


 68%|██████▊   | 99/145 [03:20<01:13,  1.60s/it]


sft_loss : 0.6842288970947266

sft_loss : 1.4124009609222412


 69%|██████▉   | 100/145 [03:21<01:10,  1.57s/it]


sft_loss : 1.5467220544815063

sft_loss : 3.638413906097412

[Step 100] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 70%|██████▉   | 101/145 [03:28<02:11,  2.99s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.6806958317756653

sft_loss : 1.0814433097839355


 70%|███████   | 102/145 [03:29<01:49,  2.55s/it]


sft_loss : 0.6820902824401855

sft_loss : 1.7418904304504395


 71%|███████   | 103/145 [03:31<01:34,  2.24s/it]


sft_loss : 0.759156346321106

sft_loss : 2.7676949501037598


 72%|███████▏  | 104/145 [03:32<01:23,  2.03s/it]


sft_loss : 0.895805299282074

sft_loss : 2.555806875228882


 72%|███████▏  | 105/145 [03:34<01:15,  1.89s/it]


sft_loss : 0.8760727643966675

sft_loss : 0.9273889660835266


 73%|███████▎  | 106/145 [03:35<01:09,  1.79s/it]


sft_loss : 0.6672253012657166

sft_loss : 1.5785645246505737


 74%|███████▍  | 107/145 [03:37<01:04,  1.70s/it]


sft_loss : 0.6302940845489502

sft_loss : 0.7538571953773499


 74%|███████▍  | 108/145 [03:38<01:01,  1.65s/it]


sft_loss : 0.682700514793396

sft_loss : 3.2110671997070312


 75%|███████▌  | 109/145 [03:40<00:57,  1.61s/it]


sft_loss : 0.9432966113090515

sft_loss : 1.4825443029403687


 76%|███████▌  | 110/145 [03:41<00:55,  1.59s/it]


sft_loss : 0.6438703536987305

sft_loss : 0.6748359799385071

[Step 110] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 77%|███████▋  | 111/145 [03:48<01:43,  3.04s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.7065472602844238

sft_loss : 2.2898006439208984


 77%|███████▋  | 112/145 [03:49<01:25,  2.58s/it]


sft_loss : 0.7621370553970337

sft_loss : 2.586369276046753


 78%|███████▊  | 113/145 [03:51<01:12,  2.27s/it]


sft_loss : 0.7512739896774292

sft_loss : 1.590317964553833


 79%|███████▊  | 114/145 [03:53<01:03,  2.05s/it]


sft_loss : 0.7507856488227844

sft_loss : 3.1533215045928955


 79%|███████▉  | 115/145 [03:54<00:56,  1.89s/it]


sft_loss : 1.3225125074386597

sft_loss : 5.400423049926758


 80%|████████  | 116/145 [03:56<00:52,  1.80s/it]


sft_loss : 0.6619682312011719

sft_loss : 0.6576573848724365


 81%|████████  | 117/145 [03:57<00:48,  1.72s/it]


sft_loss : 0.6423757672309875

sft_loss : 3.34002685546875


 81%|████████▏ | 118/145 [03:59<00:44,  1.65s/it]


sft_loss : 0.746131956577301

sft_loss : 1.4554294347763062


 82%|████████▏ | 119/145 [04:00<00:42,  1.62s/it]


sft_loss : 1.046738624572754

sft_loss : 3.1433444023132324


 83%|████████▎ | 120/145 [04:02<00:40,  1.60s/it]


sft_loss : 0.7206485271453857

sft_loss : 1.1730011701583862

[Step 120] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 83%|████████▎ | 121/145 [04:08<01:13,  3.05s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.7238277792930603

sft_loss : 1.620513916015625


 84%|████████▍ | 122/145 [04:10<00:59,  2.59s/it]


sft_loss : 0.760241687297821

sft_loss : 6.385657787322998


 85%|████████▍ | 123/145 [04:11<00:50,  2.31s/it]


sft_loss : 0.5651670098304749

sft_loss : 1.7257071733474731


 86%|████████▌ | 124/145 [04:13<00:43,  2.08s/it]


sft_loss : 0.8245099186897278

sft_loss : 1.5473796129226685


 86%|████████▌ | 125/145 [04:14<00:38,  1.90s/it]


sft_loss : 0.5666486620903015

sft_loss : 1.7818244695663452


 87%|████████▋ | 126/145 [04:16<00:33,  1.78s/it]


sft_loss : 0.7270264625549316

sft_loss : 1.3404289484024048


 88%|████████▊ | 127/145 [04:17<00:30,  1.71s/it]


sft_loss : 0.6789961457252502

sft_loss : 2.3875865936279297


 88%|████████▊ | 128/145 [04:19<00:28,  1.66s/it]


sft_loss : 0.7253471612930298

sft_loss : 1.9814170598983765


 89%|████████▉ | 129/145 [04:21<00:25,  1.62s/it]


sft_loss : 0.6057642102241516

sft_loss : 1.2072514295578003


 90%|████████▉ | 130/145 [04:22<00:23,  1.59s/it]


sft_loss : 0.7403973937034607

sft_loss : 0.9623823761940002

[Step 130] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 90%|█████████ | 131/145 [04:29<00:45,  3.21s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.9389328360557556

sft_loss : 2.5754120349884033


 91%|█████████ | 132/145 [04:31<00:35,  2.71s/it]


sft_loss : 0.6374000906944275

sft_loss : 4.441141605377197


 92%|█████████▏| 133/145 [04:32<00:28,  2.36s/it]


sft_loss : 0.6713425517082214

sft_loss : 1.7761796712875366


 92%|█████████▏| 134/145 [04:34<00:23,  2.11s/it]


sft_loss : 0.6113185286521912

sft_loss : 2.8904738426208496


 93%|█████████▎| 135/145 [04:35<00:19,  1.94s/it]


sft_loss : 0.588311493396759

sft_loss : 2.329740285873413


 94%|█████████▍| 136/145 [04:37<00:16,  1.82s/it]


sft_loss : 0.6133102774620056

sft_loss : 3.296578884124756


 94%|█████████▍| 137/145 [04:38<00:13,  1.74s/it]


sft_loss : 0.6783388257026672

sft_loss : 1.7214304208755493


 95%|█████████▌| 138/145 [04:40<00:11,  1.67s/it]


sft_loss : 1.9423385858535767

sft_loss : 4.432968616485596


 96%|█████████▌| 139/145 [04:41<00:09,  1.64s/it]


sft_loss : 0.6564850807189941

sft_loss : 2.6804041862487793


 97%|█████████▋| 140/145 [04:43<00:08,  1.62s/it]


sft_loss : 0.682193398475647

sft_loss : 0.6485667824745178

[Step 140] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 97%|█████████▋| 141/145 [04:49<00:11,  2.96s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.8182990550994873

sft_loss : 3.665487289428711


 98%|█████████▊| 142/145 [04:51<00:07,  2.54s/it]


sft_loss : 0.6825276613235474

sft_loss : 3.013423442840576


 99%|█████████▊| 143/145 [04:52<00:04,  2.24s/it]


sft_loss : 0.5782428979873657

sft_loss : 3.969665765762329


 99%|█████████▉| 144/145 [04:54<00:02,  2.04s/it]


sft_loss : 3.7740955352783203

sft_loss : 2.0768983364105225


100%|██████████| 145/145 [04:55<00:00,  2.04s/it]



===== .h =====
```cpp
#pragma once

#include "CoreMinimal.h"
#include "GameFramework/Actor.h"
#include "HealthManaCharacter.generated.h"

UCLASS()
class YOURPROJECT_API AHealthManaCharacter : public AActor
{
	GENERATED_BODY()

public:
	// Sets default values for this actor's properties
	AHealthManaCharacter();

protected:
	// Called when the game starts or when spawned
	virtual void BeginPlay() override;

public:
	// Sets the health and mana properties
	UPROPERTY(VisibleAnywhere, Category="Health")
	int32 Health;
	UPROPERTY(VisibleAnywhere, Category="Mana")
	int32 Mana;

	// Called when the player presses a key
	virtual void KeyPress(UPassEvent* pEvent) override;

	// Called when the player moves
	virtual void MovementTick(float TickTime) override;

	// Called when the player dies
	virtual void OnDeath() override;

private:
	// Called when the player's health drops below 0
	virtual void OnHealthDied(int32 HealthDied) override;

	// Called when the player's mana drops below 0
	virtual 

  0%|          | 0/145 [00:00<?, ?it/s]


sft_loss : 0.5538173317909241

sft_loss : 4.57965612411499

[Step 0] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


  1%|          | 1/145 [00:06<15:04,  6.28s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.9597225785255432

sft_loss : 4.612833023071289


  1%|▏         | 2/145 [00:07<08:19,  3.50s/it]


sft_loss : 0.6439634561538696

sft_loss : 1.986607551574707


  2%|▏         | 3/145 [00:09<06:08,  2.60s/it]


sft_loss : 0.5561613440513611

sft_loss : 3.8154921531677246


  3%|▎         | 4/145 [00:10<05:06,  2.17s/it]


sft_loss : 0.8802759051322937

sft_loss : 1.7470134496688843


  3%|▎         | 5/145 [00:12<04:33,  1.95s/it]


sft_loss : 0.6463631987571716

sft_loss : 1.2994779348373413


  4%|▍         | 6/145 [00:13<04:10,  1.80s/it]


sft_loss : 0.761961042881012

sft_loss : 1.2789069414138794


  5%|▍         | 7/145 [00:15<03:54,  1.70s/it]


sft_loss : 0.6488782167434692

sft_loss : 2.1563150882720947


  6%|▌         | 8/145 [00:16<03:45,  1.65s/it]


sft_loss : 0.9833230376243591

sft_loss : 3.4292209148406982


  6%|▌         | 9/145 [00:18<03:40,  1.62s/it]


sft_loss : 0.6045082211494446

sft_loss : 6.497182846069336


  7%|▋         | 10/145 [00:20<03:35,  1.60s/it]


sft_loss : 0.7748503684997559

sft_loss : 2.742375373840332

[Step 10] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


  8%|▊         | 11/145 [00:26<06:51,  3.07s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.694803774356842

sft_loss : 0.7090094685554504


  8%|▊         | 12/145 [00:28<05:46,  2.60s/it]


sft_loss : 0.5635454058647156

sft_loss : 2.673550844192505


  9%|▉         | 13/145 [00:29<05:01,  2.29s/it]


sft_loss : 0.5638166666030884

sft_loss : 2.197342872619629


 10%|▉         | 14/145 [00:31<04:29,  2.06s/it]


sft_loss : 0.6124984622001648

sft_loss : 0.9154421091079712


 10%|█         | 15/145 [00:32<04:07,  1.90s/it]


sft_loss : 0.6072466373443604

sft_loss : 1.1269418001174927


 11%|█         | 16/145 [00:34<03:50,  1.79s/it]


sft_loss : 0.629358172416687

sft_loss : 1.1433789730072021


 12%|█▏        | 17/145 [00:35<03:38,  1.71s/it]


sft_loss : 0.764534056186676

sft_loss : 2.926811933517456


 12%|█▏        | 18/145 [00:37<03:29,  1.65s/it]


sft_loss : 0.6703581213951111

sft_loss : 2.6475822925567627


 13%|█▎        | 19/145 [00:38<03:24,  1.62s/it]


sft_loss : 0.6317698955535889

sft_loss : 2.168058395385742


 14%|█▍        | 20/145 [00:40<03:18,  1.59s/it]


sft_loss : 1.8557829856872559

sft_loss : 5.796503067016602

[Step 20] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 14%|█▍        | 21/145 [00:46<05:51,  2.84s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.6645188927650452

sft_loss : 2.83210825920105


 15%|█▌        | 22/145 [00:47<05:01,  2.45s/it]


sft_loss : 0.7189242839813232

sft_loss : 0.6741805672645569


 16%|█▌        | 23/145 [00:49<04:24,  2.17s/it]


sft_loss : 0.5750202536582947

sft_loss : 2.4490294456481934


 17%|█▋        | 24/145 [00:50<04:00,  1.99s/it]


sft_loss : 0.553665816783905

sft_loss : 2.1231939792633057


 17%|█▋        | 25/145 [00:52<03:43,  1.86s/it]


sft_loss : 0.9045392870903015

sft_loss : 1.3980860710144043


 18%|█▊        | 26/145 [00:53<03:29,  1.76s/it]


sft_loss : 0.5708305239677429

sft_loss : 3.0042309761047363


 19%|█▊        | 27/145 [00:55<03:18,  1.68s/it]


sft_loss : 0.7184673547744751

sft_loss : 2.788670301437378


 19%|█▉        | 28/145 [00:56<03:12,  1.64s/it]


sft_loss : 0.6638219952583313

sft_loss : 0.823015570640564


 20%|██        | 29/145 [00:58<03:05,  1.60s/it]


sft_loss : 0.7054901719093323

sft_loss : 1.9363385438919067


 21%|██        | 30/145 [00:59<03:00,  1.57s/it]


sft_loss : 0.5324205160140991

sft_loss : 1.6298854351043701

[Step 30] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 21%|██▏       | 31/145 [01:06<05:45,  3.03s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.648556113243103

sft_loss : 1.4199674129486084


 22%|██▏       | 32/145 [01:07<04:51,  2.58s/it]


sft_loss : 0.5537680983543396

sft_loss : 1.8117905855178833


 23%|██▎       | 33/145 [01:09<04:14,  2.27s/it]


sft_loss : 0.6210193037986755

sft_loss : 1.0077674388885498


 23%|██▎       | 34/145 [01:10<03:47,  2.05s/it]


sft_loss : 0.815578281879425

sft_loss : 3.5618271827697754


 24%|██▍       | 35/145 [01:12<03:28,  1.90s/it]


sft_loss : 0.6875499486923218

sft_loss : 1.9315322637557983


 25%|██▍       | 36/145 [01:13<03:16,  1.80s/it]


sft_loss : 0.6818170547485352

sft_loss : 1.7166814804077148


 26%|██▌       | 37/145 [01:15<03:06,  1.72s/it]


sft_loss : 0.7770956754684448

sft_loss : 3.4785830974578857


 26%|██▌       | 38/145 [01:17<02:58,  1.67s/it]


sft_loss : 0.43426012992858887

sft_loss : 0.7320151925086975


 27%|██▋       | 39/145 [01:18<02:52,  1.62s/it]


sft_loss : 0.7075913548469543

sft_loss : 2.018198251724243


 28%|██▊       | 40/145 [01:20<02:47,  1.60s/it]


sft_loss : 0.7553194165229797

sft_loss : 0.5704517364501953

[Step 40] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 28%|██▊       | 41/145 [01:26<05:18,  3.06s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5338940620422363

sft_loss : 1.1530429124832153


 29%|██▉       | 42/145 [01:28<04:27,  2.60s/it]


sft_loss : 0.710551381111145

sft_loss : 2.26790189743042


 30%|██▉       | 43/145 [01:29<03:52,  2.28s/it]


sft_loss : 0.5488821268081665

sft_loss : 0.6097336411476135


 30%|███       | 44/145 [01:31<03:26,  2.05s/it]


sft_loss : 0.6141440272331238

sft_loss : 2.744563341140747


 31%|███       | 45/145 [01:32<03:09,  1.89s/it]


sft_loss : 0.5340821743011475

sft_loss : 0.8080744743347168


 32%|███▏      | 46/145 [01:34<02:55,  1.77s/it]


sft_loss : 0.5697867274284363

sft_loss : 0.8447109460830688


 32%|███▏      | 47/145 [01:35<02:46,  1.70s/it]


sft_loss : 0.6980170011520386

sft_loss : 3.5232837200164795


 33%|███▎      | 48/145 [01:37<02:40,  1.65s/it]


sft_loss : 0.862949788570404

sft_loss : 2.6439099311828613


 34%|███▍      | 49/145 [01:38<02:37,  1.64s/it]


sft_loss : 0.508901059627533

sft_loss : 3.4535603523254395


 34%|███▍      | 50/145 [01:40<02:32,  1.60s/it]


sft_loss : 0.5013473033905029

sft_loss : 0.8425979614257812

[Step 50] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 35%|███▌      | 51/145 [01:46<04:44,  3.03s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.4409928619861603

sft_loss : 3.480219841003418


 36%|███▌      | 52/145 [01:48<04:01,  2.59s/it]


sft_loss : 0.761559247970581

sft_loss : 1.5613737106323242


 37%|███▋      | 53/145 [01:49<03:29,  2.27s/it]


sft_loss : 0.48561206459999084

sft_loss : 6.042029857635498


 37%|███▋      | 54/145 [01:51<03:08,  2.07s/it]


sft_loss : 0.5361128449440002

sft_loss : 2.7277491092681885


 38%|███▊      | 55/145 [01:52<02:51,  1.90s/it]


sft_loss : 0.5188259482383728

sft_loss : 1.6919114589691162


 39%|███▊      | 56/145 [01:54<02:39,  1.79s/it]


sft_loss : 0.6433987617492676

sft_loss : 0.6500017642974854


 39%|███▉      | 57/145 [01:55<02:30,  1.71s/it]


sft_loss : 1.3515665531158447

sft_loss : 3.096433162689209


 40%|████      | 58/145 [01:57<02:24,  1.66s/it]


sft_loss : 0.6193497776985168

sft_loss : 3.334787368774414


 41%|████      | 59/145 [01:59<02:19,  1.62s/it]


sft_loss : 0.6065115332603455

sft_loss : 2.288451671600342


 41%|████▏     | 60/145 [02:00<02:15,  1.59s/it]


sft_loss : 0.9552468657493591

sft_loss : 0.6362959742546082

[Step 60] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 42%|████▏     | 61/145 [02:06<04:00,  2.87s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.7850084900856018

sft_loss : 1.3806569576263428


 43%|████▎     | 62/145 [02:07<03:25,  2.47s/it]


sft_loss : 0.7621108293533325

sft_loss : 2.3890206813812256


 43%|████▎     | 63/145 [02:09<02:59,  2.19s/it]


sft_loss : 0.8401412963867188

sft_loss : 4.329152584075928


 44%|████▍     | 64/145 [02:11<02:43,  2.02s/it]


sft_loss : 0.6290178894996643

sft_loss : 1.2588329315185547


 45%|████▍     | 65/145 [02:12<02:29,  1.87s/it]


sft_loss : 0.733269989490509

sft_loss : 2.2065510749816895


 46%|████▌     | 66/145 [02:14<02:20,  1.78s/it]


sft_loss : 0.5586215257644653

sft_loss : 1.5277959108352661


 46%|████▌     | 67/145 [02:15<02:12,  1.70s/it]


sft_loss : 0.6407685279846191

sft_loss : 2.052774429321289


 47%|████▋     | 68/145 [02:17<02:06,  1.65s/it]


sft_loss : 0.5090819597244263

sft_loss : 3.4805185794830322


 48%|████▊     | 69/145 [02:18<02:02,  1.62s/it]


sft_loss : 0.5400848388671875

sft_loss : 2.893364906311035


 48%|████▊     | 70/145 [02:20<01:59,  1.59s/it]


sft_loss : 0.665164589881897

sft_loss : 1.0912295579910278

[Step 70] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 49%|████▉     | 71/145 [02:26<03:41,  2.99s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.8933637738227844

sft_loss : 3.9745395183563232


 50%|████▉     | 72/145 [02:28<03:08,  2.58s/it]


sft_loss : 0.8152967691421509

sft_loss : 2.3513286113739014


 50%|█████     | 73/145 [02:29<02:43,  2.28s/it]


sft_loss : 0.571435809135437

sft_loss : 1.6586863994598389


 51%|█████     | 74/145 [02:31<02:25,  2.05s/it]


sft_loss : 0.9736805558204651

sft_loss : 6.320635795593262


 52%|█████▏    | 75/145 [02:32<02:15,  1.93s/it]


sft_loss : 0.5387884974479675

sft_loss : 0.6173902153968811


 52%|█████▏    | 76/145 [02:34<02:04,  1.80s/it]


sft_loss : 0.6539904475212097

sft_loss : 1.3424739837646484


 53%|█████▎    | 77/145 [02:35<01:57,  1.72s/it]


sft_loss : 0.515358567237854

sft_loss : 0.517318606376648


 54%|█████▍    | 78/145 [02:37<01:51,  1.66s/it]


sft_loss : 0.6539286971092224

sft_loss : 3.844021797180176


 54%|█████▍    | 79/145 [02:39<01:48,  1.64s/it]


sft_loss : 0.5424833297729492

sft_loss : 1.3041157722473145


 55%|█████▌    | 80/145 [02:40<01:44,  1.60s/it]


sft_loss : 1.4762487411499023

sft_loss : 2.96513295173645

[Step 80] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 56%|█████▌    | 81/145 [02:46<03:05,  2.90s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5718615651130676

sft_loss : 0.6196781396865845


 57%|█████▋    | 82/145 [02:48<02:36,  2.49s/it]


sft_loss : 0.3986942768096924

sft_loss : 6.866457939147949


 57%|█████▋    | 83/145 [02:49<02:17,  2.21s/it]


sft_loss : 0.4762178063392639

sft_loss : 1.4380590915679932


 58%|█████▊    | 84/145 [02:51<02:02,  2.01s/it]


sft_loss : 0.5891888737678528

sft_loss : 0.6265043616294861


 59%|█████▊    | 85/145 [02:52<01:51,  1.86s/it]


sft_loss : 0.5540335774421692

sft_loss : 0.7140220999717712


 59%|█████▉    | 86/145 [02:54<01:43,  1.76s/it]


sft_loss : 0.6196131110191345

sft_loss : 5.009119510650635


 60%|██████    | 87/145 [02:55<01:39,  1.71s/it]


sft_loss : 0.6237568259239197

sft_loss : 0.6957798600196838


 61%|██████    | 88/145 [02:57<01:33,  1.65s/it]


sft_loss : 0.47414353489875793

sft_loss : 1.862622618675232


 61%|██████▏   | 89/145 [02:58<01:30,  1.61s/it]


sft_loss : 0.6915391683578491

sft_loss : 1.9664291143417358


 62%|██████▏   | 90/145 [03:00<01:26,  1.58s/it]


sft_loss : 0.6118322014808655

sft_loss : 2.4711668491363525

[Step 90] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 63%|██████▎   | 91/145 [03:06<02:44,  3.05s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.7346832752227783

sft_loss : 1.8404359817504883


 63%|██████▎   | 92/145 [03:08<02:17,  2.59s/it]


sft_loss : 0.8727829456329346

sft_loss : 5.802872657775879


 64%|██████▍   | 93/145 [03:09<01:59,  2.29s/it]


sft_loss : 0.7031731009483337

sft_loss : 0.5892534852027893


 65%|██████▍   | 94/145 [03:11<01:45,  2.06s/it]


sft_loss : 0.5791723132133484

sft_loss : 4.741178035736084


 66%|██████▌   | 95/145 [03:12<01:35,  1.91s/it]


sft_loss : 0.6452426910400391

sft_loss : 2.352141857147217


 66%|██████▌   | 96/145 [03:14<01:28,  1.81s/it]


sft_loss : 0.5629545450210571

sft_loss : 1.6611130237579346


 67%|██████▋   | 97/145 [03:16<01:22,  1.73s/it]


sft_loss : 0.5600804686546326

sft_loss : 2.5296103954315186


 68%|██████▊   | 98/145 [03:17<01:18,  1.67s/it]


sft_loss : 1.9641366004943848

sft_loss : 0.9656941890716553


 68%|██████▊   | 99/145 [03:19<01:15,  1.63s/it]


sft_loss : 0.8703615069389343

sft_loss : 2.7711801528930664


 69%|██████▉   | 100/145 [03:20<01:12,  1.61s/it]


sft_loss : 1.5582449436187744

sft_loss : 2.653103828430176

[Step 100] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 70%|██████▉   | 101/145 [03:25<01:55,  2.62s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5029062628746033

sft_loss : 1.763092041015625


 70%|███████   | 102/145 [03:27<01:38,  2.29s/it]


sft_loss : 0.7284780144691467

sft_loss : 1.3959801197052002


 71%|███████   | 103/145 [03:28<01:26,  2.06s/it]


sft_loss : 0.6180775165557861

sft_loss : 1.3699727058410645


 72%|███████▏  | 104/145 [03:30<01:17,  1.90s/it]


sft_loss : 0.49114173650741577

sft_loss : 0.6730020642280579


 72%|███████▏  | 105/145 [03:31<01:11,  1.79s/it]


sft_loss : 0.4830079972743988

sft_loss : 6.666780948638916


 73%|███████▎  | 106/145 [03:33<01:08,  1.75s/it]


sft_loss : 1.0052136182785034

sft_loss : 3.3055810928344727


 74%|███████▍  | 107/145 [03:35<01:04,  1.69s/it]


sft_loss : 0.556829035282135

sft_loss : 6.597460746765137


 74%|███████▍  | 108/145 [03:36<01:01,  1.67s/it]


sft_loss : 0.5598716139793396

sft_loss : 1.4915882349014282


 75%|███████▌  | 109/145 [03:38<00:58,  1.63s/it]


sft_loss : 0.7629357576370239

sft_loss : 1.1731247901916504


 76%|███████▌  | 110/145 [03:39<00:56,  1.61s/it]


sft_loss : 0.8367260694503784

sft_loss : 4.535559177398682

[Step 110] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 77%|███████▋  | 111/145 [03:45<01:36,  2.84s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5626264214515686

sft_loss : 0.4477676451206207


 77%|███████▋  | 112/145 [03:46<01:20,  2.44s/it]


sft_loss : 0.7241962552070618

sft_loss : 4.347534656524658


 78%|███████▊  | 113/145 [03:48<01:09,  2.18s/it]


sft_loss : 1.0358710289001465

sft_loss : 1.5570135116577148


 79%|███████▊  | 114/145 [03:50<01:01,  2.00s/it]


sft_loss : 0.5958101153373718

sft_loss : 1.194120168685913


 79%|███████▉  | 115/145 [03:51<00:55,  1.86s/it]


sft_loss : 0.6850647926330566

sft_loss : 1.893667459487915


 80%|████████  | 116/145 [03:53<00:50,  1.76s/it]


sft_loss : 0.6204142570495605

sft_loss : 0.5622128248214722


 81%|████████  | 117/145 [03:54<00:46,  1.68s/it]


sft_loss : 0.6341899633407593

sft_loss : 0.5717595815658569


 81%|████████▏ | 118/145 [03:56<00:43,  1.62s/it]


sft_loss : 0.5638184547424316

sft_loss : 4.376733779907227


 82%|████████▏ | 119/145 [03:57<00:41,  1.60s/it]


sft_loss : 0.5718650221824646

sft_loss : 2.9350547790527344


 83%|████████▎ | 120/145 [03:59<00:39,  1.57s/it]


sft_loss : 0.7488874793052673

sft_loss : 1.3854343891143799

[Step 120] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 83%|████████▎ | 121/145 [04:04<01:07,  2.83s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.4567755460739136

sft_loss : 1.0301439762115479


 84%|████████▍ | 122/145 [04:06<00:56,  2.44s/it]


sft_loss : 0.4427262842655182

sft_loss : 2.3508541584014893


 85%|████████▍ | 123/145 [04:08<00:47,  2.17s/it]


sft_loss : 0.7078313231468201

sft_loss : 4.609437465667725


 86%|████████▌ | 124/145 [04:09<00:42,  2.00s/it]


sft_loss : 0.4851774573326111

sft_loss : 0.517598032951355


 86%|████████▌ | 125/145 [04:11<00:36,  1.85s/it]


sft_loss : 0.5033730268478394

sft_loss : 2.6172802448272705


 87%|████████▋ | 126/145 [04:12<00:33,  1.76s/it]


sft_loss : 0.6750736832618713

sft_loss : 0.8672330379486084


 88%|████████▊ | 127/145 [04:14<00:30,  1.69s/it]


sft_loss : 0.45663321018218994

sft_loss : 2.3278706073760986


 88%|████████▊ | 128/145 [04:15<00:27,  1.64s/it]


sft_loss : 0.6468752026557922

sft_loss : 4.128251075744629


 89%|████████▉ | 129/145 [04:17<00:25,  1.61s/it]


sft_loss : 0.5867926478385925

sft_loss : 2.0782666206359863


 90%|████████▉ | 130/145 [04:18<00:23,  1.59s/it]


sft_loss : 0.7899599671363831

sft_loss : 2.604856014251709

[Step 130] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 90%|█████████ | 131/145 [04:25<00:43,  3.13s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.6649365425109863

sft_loss : 1.2882872819900513


 91%|█████████ | 132/145 [04:27<00:34,  2.65s/it]


sft_loss : 1.4981945753097534

sft_loss : 1.920037031173706


 92%|█████████▏| 133/145 [04:28<00:27,  2.33s/it]


sft_loss : 0.566878616809845

sft_loss : 3.0782055854797363


 92%|█████████▏| 134/145 [04:30<00:23,  2.09s/it]


sft_loss : 0.4833344519138336

sft_loss : 1.8651424646377563


 93%|█████████▎| 135/145 [04:31<00:19,  1.92s/it]


sft_loss : 1.4658918380737305

sft_loss : 5.3272905349731445


 94%|█████████▍| 136/145 [04:33<00:16,  1.82s/it]


sft_loss : 0.4401077330112457

sft_loss : 1.9121966361999512


 94%|█████████▍| 137/145 [04:34<00:13,  1.73s/it]


sft_loss : 0.5473564267158508

sft_loss : 2.128317356109619


 95%|█████████▌| 138/145 [04:36<00:11,  1.68s/it]


sft_loss : 0.5729819536209106

sft_loss : 1.1968518495559692


 96%|█████████▌| 139/145 [04:37<00:09,  1.64s/it]


sft_loss : 0.57430499792099

sft_loss : 4.107205867767334


 97%|█████████▋| 140/145 [04:39<00:08,  1.62s/it]


sft_loss : 4.2051591873168945

sft_loss : 5.398074150085449

[Step 140] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 97%|█████████▋| 141/145 [04:44<00:10,  2.72s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.442344605922699

sft_loss : 0.7343506217002869


 98%|█████████▊| 142/145 [04:46<00:07,  2.37s/it]


sft_loss : 0.5555158257484436

sft_loss : 2.360795497894287


 99%|█████████▊| 143/145 [04:47<00:04,  2.12s/it]


sft_loss : 0.6629119515419006

sft_loss : 0.7780769467353821


 99%|█████████▉| 144/145 [04:49<00:01,  1.94s/it]


sft_loss : 0.4139104187488556

sft_loss : 4.891012668609619


100%|██████████| 145/145 [04:50<00:00,  2.01s/it]



===== .h =====
```cpp
#pragma once

#include "CoreMinimal.h"
#include "GameFramework/Actor.h"
#include "HealthManaActor.generated.h"

UCLASS()
class YOURPROJECT_API AHealthManaActor : public AActor
{
	GENERATED_BODY()

public:
	// Sets default values for this actor's properties
	AAHealthManaActor();

protected:
	// Called when the game starts or when spawned
	virtual void BeginPlay() override;

public:
	// Called every frame
	virtual void Tick(float DeltaTime) override;

private:
	// Health and mana properties
	UPROPERTY(VisibleAnywhere)
	UHealthComponent* HealthComponent;
	UPROPERTY(VisibleAnywhere)
	UManaComponent* ManaComponent;

	// Health and mana values
	float HealthValue;
	float ManaValue;
};
```

===== .cpp =====
```cpp
#include "HealthManaActor.h"
#include "Components/HealthComponent.h"
#include "Components/ManaComponent.h"

// Sets default values
AAHealthManaActor::AAHealthManaActor()
{
	// Set this actor to call Tick() every frame.  You can turn this off to improve performa

  0%|          | 0/145 [00:00<?, ?it/s]


sft_loss : 0.6042742133140564

sft_loss : 0.8726729154586792

[Step 0] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


  1%|          | 1/145 [00:06<14:57,  6.23s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5799397826194763

sft_loss : 3.6892752647399902


  1%|▏         | 2/145 [00:07<08:18,  3.48s/it]


sft_loss : 0.5158507227897644

sft_loss : 2.896439790725708


  2%|▏         | 3/145 [00:09<06:10,  2.61s/it]


sft_loss : 0.8049920201301575

sft_loss : 2.06036639213562


  3%|▎         | 4/145 [00:10<05:09,  2.19s/it]


sft_loss : 0.46935632824897766

sft_loss : 1.137063980102539


  3%|▎         | 5/145 [00:12<04:32,  1.95s/it]


sft_loss : 0.7050656080245972

sft_loss : 1.534252643585205


  4%|▍         | 6/145 [00:13<04:12,  1.82s/it]


sft_loss : 0.8853334784507751

sft_loss : 0.5882955193519592


  5%|▍         | 7/145 [00:15<03:58,  1.73s/it]


sft_loss : 0.5184187293052673

sft_loss : 3.807014226913452


  6%|▌         | 8/145 [00:17<03:49,  1.67s/it]


sft_loss : 0.49832820892333984

sft_loss : 4.281787395477295


  6%|▌         | 9/145 [00:18<03:41,  1.63s/it]


sft_loss : 0.4704013466835022

sft_loss : 1.118285059928894


  7%|▋         | 10/145 [00:20<03:35,  1.60s/it]


sft_loss : 0.6057973504066467

sft_loss : 3.8336148262023926

[Step 10] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


  8%|▊         | 11/145 [00:25<06:26,  2.88s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.6217861175537109

sft_loss : 1.0707907676696777


  8%|▊         | 12/145 [00:27<05:28,  2.47s/it]


sft_loss : 0.6198932528495789

sft_loss : 3.268322229385376


  9%|▉         | 13/145 [00:28<04:46,  2.17s/it]


sft_loss : 0.4236537516117096

sft_loss : 1.6745781898498535


 10%|▉         | 14/145 [00:30<04:19,  1.98s/it]


sft_loss : 0.4800664186477661

sft_loss : 2.151217222213745


 10%|█         | 15/145 [00:32<03:59,  1.84s/it]


sft_loss : 1.4084028005599976

sft_loss : 2.749749183654785


 11%|█         | 16/145 [00:33<03:46,  1.75s/it]


sft_loss : 0.4693219065666199

sft_loss : 0.6603116393089294


 12%|█▏        | 17/145 [00:35<03:35,  1.68s/it]


sft_loss : 0.5617431998252869

sft_loss : 1.0686092376708984


 12%|█▏        | 18/145 [00:36<03:27,  1.63s/it]


sft_loss : 0.6015220880508423

sft_loss : 5.861677169799805


 13%|█▎        | 19/145 [00:38<03:23,  1.62s/it]


sft_loss : 0.48781758546829224

sft_loss : 2.273322105407715


 14%|█▍        | 20/145 [00:39<03:19,  1.59s/it]


sft_loss : 0.6289102435112

sft_loss : 2.021918773651123

[Step 20] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 14%|█▍        | 21/145 [00:45<05:57,  2.88s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5280370712280273

sft_loss : 1.385507345199585


 15%|█▌        | 22/145 [00:47<05:04,  2.48s/it]


sft_loss : 0.664604902267456

sft_loss : 3.900043249130249


 16%|█▌        | 23/145 [00:48<04:29,  2.21s/it]


sft_loss : 0.5345051884651184

sft_loss : 1.1442492008209229


 17%|█▋        | 24/145 [00:50<04:04,  2.02s/it]


sft_loss : 0.6215155124664307

sft_loss : 0.595985472202301


 17%|█▋        | 25/145 [00:51<03:44,  1.87s/it]


sft_loss : 0.49005749821662903

sft_loss : 1.710246205329895


 18%|█▊        | 26/145 [00:53<03:31,  1.78s/it]


sft_loss : 0.7331787943840027

sft_loss : 6.749712944030762


 19%|█▊        | 27/145 [00:54<03:23,  1.73s/it]


sft_loss : 0.5121043920516968

sft_loss : 1.4790644645690918


 19%|█▉        | 28/145 [00:56<03:15,  1.67s/it]


sft_loss : 0.4098884165287018

sft_loss : 4.405396938323975


 20%|██        | 29/145 [00:58<03:10,  1.64s/it]


sft_loss : 2.463813543319702

sft_loss : 0.9913555383682251


 21%|██        | 30/145 [00:59<03:05,  1.62s/it]


sft_loss : 0.6692589521408081

sft_loss : 4.134130001068115

[Step 30] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 21%|██▏       | 31/145 [01:06<05:47,  3.05s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.38101741671562195

sft_loss : 2.3380532264709473


 22%|██▏       | 32/145 [01:07<04:53,  2.60s/it]


sft_loss : 0.46930044889450073

sft_loss : 2.792599678039551


 23%|██▎       | 33/145 [01:09<04:15,  2.28s/it]


sft_loss : 2.077252149581909

sft_loss : 5.778203964233398


 23%|██▎       | 34/145 [01:10<03:52,  2.09s/it]


sft_loss : 0.568300187587738

sft_loss : 2.1706104278564453


 24%|██▍       | 35/145 [01:12<03:31,  1.92s/it]


sft_loss : 0.9106346368789673

sft_loss : 3.154587507247925


 25%|██▍       | 36/145 [01:13<03:17,  1.82s/it]


sft_loss : 0.4512772560119629

sft_loss : 2.3787925243377686


 26%|██▌       | 37/145 [01:15<03:06,  1.73s/it]


sft_loss : 0.520519495010376

sft_loss : 0.8623546361923218


 26%|██▌       | 38/145 [01:16<02:59,  1.67s/it]


sft_loss : 0.5301849246025085

sft_loss : 1.460994005203247


 27%|██▋       | 39/145 [01:18<02:53,  1.63s/it]


sft_loss : 0.3507061302661896

sft_loss : 0.46215957403182983


 28%|██▊       | 40/145 [01:19<02:47,  1.59s/it]


sft_loss : 1.3893235921859741

sft_loss : 2.8748629093170166

[Step 40] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 28%|██▊       | 41/145 [01:25<04:53,  2.82s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.6127660870552063

sft_loss : 4.5324225425720215


 29%|██▉       | 42/145 [01:27<04:14,  2.47s/it]


sft_loss : 0.4872444272041321

sft_loss : 1.6356335878372192


 30%|██▉       | 43/145 [01:28<03:42,  2.18s/it]


sft_loss : 0.6775954961776733

sft_loss : 1.6806553602218628


 30%|███       | 44/145 [01:30<03:20,  1.99s/it]


sft_loss : 0.5399125218391418

sft_loss : 2.283512830734253


 31%|███       | 45/145 [01:31<03:04,  1.85s/it]


sft_loss : 0.5410013198852539

sft_loss : 2.2499642372131348


 32%|███▏      | 46/145 [01:33<02:53,  1.76s/it]


sft_loss : 0.5504840016365051

sft_loss : 8.246084213256836


 32%|███▏      | 47/145 [01:35<02:48,  1.72s/it]


sft_loss : 0.5448347926139832

sft_loss : 0.9430792927742004


 33%|███▎      | 48/145 [01:36<02:41,  1.66s/it]


sft_loss : 0.5300870537757874

sft_loss : 2.2106964588165283


 34%|███▍      | 49/145 [01:38<02:36,  1.63s/it]


sft_loss : 0.44940856099128723

sft_loss : 1.4349803924560547


 34%|███▍      | 50/145 [01:39<02:31,  1.60s/it]


sft_loss : 0.555819034576416

sft_loss : 4.227428436279297

[Step 50] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 35%|███▌      | 51/145 [01:44<04:05,  2.61s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5438241958618164

sft_loss : 0.5510082840919495


 36%|███▌      | 52/145 [01:46<03:32,  2.29s/it]


sft_loss : 1.3599295616149902

sft_loss : 5.284185886383057


 37%|███▋      | 53/145 [01:47<03:12,  2.09s/it]


sft_loss : 0.4758237302303314

sft_loss : 1.0685027837753296


 37%|███▋      | 54/145 [01:49<02:55,  1.93s/it]


sft_loss : 0.5380455851554871

sft_loss : 0.6310511827468872


 38%|███▊      | 55/145 [01:50<02:42,  1.80s/it]


sft_loss : 1.153076410293579

sft_loss : 2.9327499866485596


 39%|███▊      | 56/145 [01:52<02:33,  1.73s/it]


sft_loss : 1.225630521774292

sft_loss : 2.8826963901519775


 39%|███▉      | 57/145 [01:53<02:27,  1.68s/it]


sft_loss : 0.8610087633132935

sft_loss : 1.9267456531524658


 40%|████      | 58/145 [01:55<02:22,  1.64s/it]


sft_loss : 0.45561888813972473

sft_loss : 2.761070728302002


 41%|████      | 59/145 [01:57<02:17,  1.60s/it]


sft_loss : 0.48070240020751953

sft_loss : 1.1005719900131226


 41%|████▏     | 60/145 [01:58<02:14,  1.58s/it]


sft_loss : 0.5487913489341736

sft_loss : 3.482835531234741

[Step 60] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 42%|████▏     | 61/145 [02:03<03:42,  2.65s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5921378135681152

sft_loss : 1.091269850730896


 43%|████▎     | 62/145 [02:05<03:12,  2.32s/it]


sft_loss : 0.5651102066040039

sft_loss : 0.9418802261352539


 43%|████▎     | 63/145 [02:06<02:50,  2.08s/it]


sft_loss : 0.6837935447692871

sft_loss : 0.6697665452957153


 44%|████▍     | 64/145 [02:08<02:34,  1.91s/it]


sft_loss : 0.5809841752052307

sft_loss : 0.48459675908088684


 45%|████▍     | 65/145 [02:09<02:23,  1.79s/it]


sft_loss : 0.6183410882949829

sft_loss : 6.1258320808410645


 46%|████▌     | 66/145 [02:11<02:18,  1.75s/it]


sft_loss : 0.5398498177528381

sft_loss : 2.458676338195801


 46%|████▌     | 67/145 [02:13<02:12,  1.69s/it]


sft_loss : 0.5760840773582458

sft_loss : 1.9424262046813965


 47%|████▋     | 68/145 [02:14<02:06,  1.65s/it]


sft_loss : 0.989105224609375

sft_loss : 1.2227227687835693


 48%|████▊     | 69/145 [02:16<02:02,  1.61s/it]


sft_loss : 0.5565074682235718

sft_loss : 2.885974645614624


 48%|████▊     | 70/145 [02:17<01:58,  1.58s/it]


sft_loss : 0.6030009984970093

sft_loss : 3.07348370552063

[Step 70] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 49%|████▉     | 71/145 [02:23<03:31,  2.85s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5085622072219849

sft_loss : 2.8939459323883057


 50%|████▉     | 72/145 [02:25<03:00,  2.47s/it]


sft_loss : 0.5641981363296509

sft_loss : 1.517469882965088


 50%|█████     | 73/145 [02:26<02:37,  2.19s/it]


sft_loss : 0.536063015460968

sft_loss : 3.889618396759033


 51%|█████     | 74/145 [02:28<02:22,  2.00s/it]


sft_loss : 0.655368447303772

sft_loss : 3.5513975620269775


 52%|█████▏    | 75/145 [02:29<02:09,  1.85s/it]


sft_loss : 0.6604238152503967

sft_loss : 1.4923239946365356


 52%|█████▏    | 76/145 [02:31<02:00,  1.75s/it]


sft_loss : 0.5058935880661011

sft_loss : 0.5545116662979126


 53%|█████▎    | 77/145 [02:32<01:54,  1.68s/it]


sft_loss : 0.5160127282142639

sft_loss : 7.205901145935059


 54%|█████▍    | 78/145 [02:34<01:52,  1.68s/it]


sft_loss : 0.4087733328342438

sft_loss : 3.1719086170196533


 54%|█████▍    | 79/145 [02:35<01:48,  1.64s/it]


sft_loss : 0.40712711215019226

sft_loss : 2.7174015045166016


 55%|█████▌    | 80/145 [02:37<01:44,  1.61s/it]


sft_loss : 0.4258338212966919

sft_loss : 4.144455432891846

[Step 80] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 56%|█████▌    | 81/145 [02:43<03:14,  3.05s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5709173679351807

sft_loss : 1.7823368310928345


 57%|█████▋    | 82/145 [02:45<02:43,  2.60s/it]


sft_loss : 0.5118489861488342

sft_loss : 2.396653890609741


 57%|█████▋    | 83/145 [02:46<02:21,  2.29s/it]


sft_loss : 0.4977557957172394

sft_loss : 1.2831460237503052


 58%|█████▊    | 84/145 [02:48<02:05,  2.06s/it]


sft_loss : 0.48392125964164734

sft_loss : 2.904310703277588


 59%|█████▊    | 85/145 [02:50<01:54,  1.91s/it]


sft_loss : 0.4012974500656128

sft_loss : 1.1218349933624268


 59%|█████▉    | 86/145 [02:51<01:45,  1.79s/it]


sft_loss : 0.4042198061943054

sft_loss : 1.0797010660171509


 60%|██████    | 87/145 [02:53<01:39,  1.72s/it]


sft_loss : 0.43325263261795044

sft_loss : 2.2761638164520264


 61%|██████    | 88/145 [02:54<01:34,  1.66s/it]


sft_loss : 1.1431565284729004

sft_loss : 3.1869165897369385


 61%|██████▏   | 89/145 [02:56<01:31,  1.64s/it]


sft_loss : 0.5476067066192627

sft_loss : 1.8122749328613281


 62%|██████▏   | 90/145 [02:57<01:28,  1.61s/it]


sft_loss : 0.5257592797279358

sft_loss : 1.0945606231689453

[Step 90] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 63%|██████▎   | 91/145 [03:04<02:45,  3.06s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.4368707537651062

sft_loss : 5.920717716217041


 63%|██████▎   | 92/145 [03:05<02:18,  2.61s/it]


sft_loss : 0.5627446174621582

sft_loss : 0.8150585889816284


 64%|██████▍   | 93/145 [03:07<01:58,  2.28s/it]


sft_loss : 0.574283242225647

sft_loss : 0.5951713919639587


 65%|██████▍   | 94/145 [03:08<01:44,  2.05s/it]


sft_loss : 0.5442046523094177

sft_loss : 0.880704402923584


 66%|██████▌   | 95/145 [03:10<01:34,  1.89s/it]


sft_loss : 0.5866042971611023

sft_loss : 1.2034574747085571


 66%|██████▌   | 96/145 [03:11<01:27,  1.79s/it]


sft_loss : 1.7023564577102661

sft_loss : 2.6912598609924316


 67%|██████▋   | 97/145 [03:13<01:22,  1.71s/it]


sft_loss : 1.189361333847046

sft_loss : 1.805237054824829


 68%|██████▊   | 98/145 [03:14<01:17,  1.66s/it]


sft_loss : 0.5823369026184082

sft_loss : 1.8927395343780518


 68%|██████▊   | 99/145 [03:16<01:14,  1.61s/it]


sft_loss : 0.3914101719856262

sft_loss : 1.311530351638794


 69%|██████▉   | 100/145 [03:17<01:11,  1.59s/it]


sft_loss : 0.4699037969112396

sft_loss : 4.0053606033325195

[Step 100] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 70%|██████▉   | 101/145 [03:24<02:13,  3.05s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5192766189575195

sft_loss : 2.761563301086426


 70%|███████   | 102/145 [03:25<01:51,  2.59s/it]


sft_loss : 0.4622003436088562

sft_loss : 4.02002477645874


 71%|███████   | 103/145 [03:27<01:35,  2.28s/it]


sft_loss : 0.43076327443122864

sft_loss : 2.630587577819824


 72%|███████▏  | 104/145 [03:29<01:24,  2.06s/it]


sft_loss : 0.44373711943626404

sft_loss : 1.490363359451294


 72%|███████▏  | 105/145 [03:30<01:16,  1.90s/it]


sft_loss : 0.5985623002052307

sft_loss : 1.234965205192566


 73%|███████▎  | 106/145 [03:32<01:09,  1.79s/it]


sft_loss : 0.8547162413597107

sft_loss : 3.222241163253784


 74%|███████▍  | 107/145 [03:33<01:05,  1.72s/it]


sft_loss : 0.4713281989097595

sft_loss : 3.579742670059204


 74%|███████▍  | 108/145 [03:35<01:01,  1.67s/it]


sft_loss : 0.4122050106525421

sft_loss : 2.993835210800171


 75%|███████▌  | 109/145 [03:36<00:58,  1.64s/it]


sft_loss : 0.4585685133934021

sft_loss : 0.43689364194869995


 76%|███████▌  | 110/145 [03:38<00:56,  1.60s/it]


sft_loss : 0.41429534554481506

sft_loss : 3.2533020973205566

[Step 110] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 77%|███████▋  | 111/145 [03:44<01:44,  3.09s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.43841752409935

sft_loss : 1.8815958499908447


 77%|███████▋  | 112/145 [03:46<01:26,  2.63s/it]


sft_loss : 0.49418631196022034

sft_loss : 2.9904801845550537


 78%|███████▊  | 113/145 [03:47<01:13,  2.30s/it]


sft_loss : 0.41182854771614075

sft_loss : 1.1194102764129639


 79%|███████▊  | 114/145 [03:49<01:04,  2.08s/it]


sft_loss : 0.537503182888031

sft_loss : 3.142713785171509


 79%|███████▉  | 115/145 [03:51<00:57,  1.92s/it]


sft_loss : 4.751204490661621

sft_loss : 4.477811813354492


 80%|████████  | 116/145 [03:52<00:52,  1.82s/it]


sft_loss : 0.7068221569061279

sft_loss : 0.7922099828720093


 81%|████████  | 117/145 [03:54<00:48,  1.74s/it]


sft_loss : 0.44239985942840576

sft_loss : 2.356860637664795


 81%|████████▏ | 118/145 [03:55<00:45,  1.69s/it]


sft_loss : 0.4140112102031708

sft_loss : 4.62811279296875


 82%|████████▏ | 119/145 [03:57<00:42,  1.65s/it]


sft_loss : 0.7148157358169556

sft_loss : 1.1042842864990234


 83%|████████▎ | 120/145 [03:58<00:40,  1.63s/it]


sft_loss : 0.4452880024909973

sft_loss : 3.752631187438965

[Step 120] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 83%|████████▎ | 121/145 [04:05<01:14,  3.09s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.43335193395614624

sft_loss : 2.1062889099121094


 84%|████████▍ | 122/145 [04:06<01:00,  2.62s/it]


sft_loss : 0.4937903583049774

sft_loss : 0.42032983899116516


 85%|████████▍ | 123/145 [04:08<00:50,  2.28s/it]


sft_loss : 0.373820424079895

sft_loss : 0.8949210047721863


 86%|████████▌ | 124/145 [04:09<00:43,  2.05s/it]


sft_loss : 0.5506321787834167

sft_loss : 0.8133677244186401


 86%|████████▌ | 125/145 [04:11<00:37,  1.90s/it]


sft_loss : 0.45178085565567017

sft_loss : 0.4687863290309906


 87%|████████▋ | 126/145 [04:13<00:34,  1.80s/it]


sft_loss : 0.5848938226699829

sft_loss : 1.4297999143600464


 88%|████████▊ | 127/145 [04:14<00:31,  1.73s/it]


sft_loss : 0.47602254152297974

sft_loss : 3.9808688163757324


 88%|████████▊ | 128/145 [04:16<00:28,  1.67s/it]


sft_loss : 0.44192859530448914

sft_loss : 1.7017139196395874


 89%|████████▉ | 129/145 [04:17<00:26,  1.64s/it]


sft_loss : 0.5230631232261658

sft_loss : 1.6038070917129517


 90%|████████▉ | 130/145 [04:19<00:24,  1.61s/it]


sft_loss : 0.3940177261829376

sft_loss : 2.0716710090637207

[Step 130] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 90%|█████████ | 131/145 [04:25<00:42,  3.07s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.45868581533432007

sft_loss : 2.5424277782440186


 91%|█████████ | 132/145 [04:27<00:34,  2.62s/it]


sft_loss : 0.5180809497833252

sft_loss : 2.6455812454223633


 92%|█████████▏| 133/145 [04:28<00:27,  2.30s/it]


sft_loss : 0.6646388173103333

sft_loss : 0.9415645599365234


 92%|█████████▏| 134/145 [04:30<00:22,  2.06s/it]


sft_loss : 0.6694374084472656

sft_loss : 1.3120306730270386


 93%|█████████▎| 135/145 [04:31<00:18,  1.90s/it]


sft_loss : 0.4493888318538666

sft_loss : 1.1355723142623901


 94%|█████████▍| 136/145 [04:33<00:16,  1.79s/it]


sft_loss : 0.46498680114746094

sft_loss : 2.01450777053833


 94%|█████████▍| 137/145 [04:34<00:13,  1.71s/it]


sft_loss : 1.129323959350586

sft_loss : 4.482363700866699


 95%|█████████▌| 138/145 [04:36<00:11,  1.68s/it]


sft_loss : 0.6671041250228882

sft_loss : 2.40092134475708


 96%|█████████▌| 139/145 [04:38<00:09,  1.64s/it]


sft_loss : 0.39106935262680054

sft_loss : 7.746513366699219


 97%|█████████▋| 140/145 [04:39<00:08,  1.64s/it]


sft_loss : 0.5671624541282654

sft_loss : 2.059047222137451

[Step 140] Ref generator updated.
Saving LoRA adapter...
Saved value head to ./checkpoint_1_3b/generator_SFT/value_head.pt
Saved tokenizer to ./checkpoint_1_3b/generator_SFT/tokenizer
Saving LoRA adapter...
Saved classifier head to ./checkpoint_1_3b/discriminator_SFT/classifier.pt


 97%|█████████▋| 141/145 [04:46<00:12,  3.05s/it]

Saved tokenizer to ./checkpoint_1_3b/discriminator_SFT/tokenizer

sft_loss : 0.5647940635681152

sft_loss : 1.669549584388733


 98%|█████████▊| 142/145 [04:47<00:07,  2.59s/it]


sft_loss : 0.5561890602111816

sft_loss : 3.003769636154175


 99%|█████████▊| 143/145 [04:49<00:04,  2.28s/it]


sft_loss : 0.3938320279121399

sft_loss : 2.630018949508667


 99%|█████████▉| 144/145 [04:50<00:02,  2.06s/it]


sft_loss : 0.38271763920783997

sft_loss : 2.6792778968811035


100%|██████████| 145/145 [04:52<00:00,  2.02s/it]



===== .h =====
```cpp
#pragma once

#include "CoreMinimal.h"
#include "GameFramework/Character.h"
#include "HealthManaCharacter.generated.h"

UCLASS()
class YOURPROJECT_API AHealthManaCharacter : public ACharacter
{
	GENERATED_BODY()

public:
	// Sets default values for this character's properties
	AHealthManaCharacter();

protected:
	// Called when the game starts or when spawned
	virtual void BeginPlay() override;

public:
	// Called every frame
	virtual void Tick(float DeltaTime) override;

	// Health
	UPROPERTY(VisibleAnywhere, Category="Health")
	float Health;

	// Mana
	UPROPERTY(VisibleAnywhere, Category="Mana")
	float Mana;

	// Constructor
	// Implement when creating a new character
	AHealthManaCharacter();

private:
	// Called when the player starts or ends
	virtual void OnStartGame(const FGameContext::FGameStateStartInfo& GameStateStartInfo) override;

	// Called when the player enters or leaves a room
	virtual void OnRoomEntry(const FGameContext::FGameStateEntryInfo& GameS