In [None]:
from _init import *

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [None]:
import random, torch
from transformers import AutoModelForCausalLM, AutoTokenizer

from ranger.utils import json_utils
from ranger.reward.reward_calculator import RewardCalculator
from ranger.train.ranger_trainer import RangerTrainer

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
def set_seed(seed: int):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    random.seed(seed)
    print(f'set_seed() seed : {seed}\n')

seed = 42
set_seed(seed)

In [None]:
work_dir = f'/home/nlpshlee/dev_env/git/repos/ranger'
data_dir = f'{work_dir}/data'
out_dir = f'{work_dir}/outputs/test'

In [None]:
def get_response(model: AutoModelForCausalLM, tokenizer: AutoTokenizer, text: str):
    inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        outputs = model.generate(
            **inputs, 
            max_new_tokens=20, 
            do_sample=False,  # [중요] 랜덤성 제거 (Greedy Decoding)
            temperature=None,
            top_p=None
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
reward_calculator = RewardCalculator(REWARD_CONFIG['reward_option'])

In [None]:
# 8. 재시작하고 여기까지만 실행시켜 봤을 때, 동일한지 확인
# MODEL_CONFIG['resume_run_time'] = '2025-12-18-03-35-16'

# ranger_trainer3 = RangerTrainer(
#     MODEL_CONFIG,
#     reward_calculator,
#     out_dir
# )

# gen4 = get_response(ranger_trainer3._model, ranger_trainer3._tokenizer, "Hello, tell me a story.")
# print(f'[Resume reboot] gen4 : {gen4}')

In [None]:
ranger_trainer = RangerTrainer(
    MODEL_CONFIG,
    reward_calculator,
    out_dir
)

In [None]:
# 1. 변경 전 모델로 생성
gen1 = get_response(ranger_trainer._model, ranger_trainer._tokenizer, "Hello, tell me a story.")
print(f'[Org] gen1 : {gen1}')

In [None]:
# 2. 가중치 강제 변경
# 학습을 안 했어도 값이 변했다는 것을 증명하기 위해 강제로 값을 더함
with torch.no_grad():
    modified_count = 0

    for name, param in ranger_trainer._model.named_parameters():
        if "lora_" in name:
            noise = torch.randn_like(param) 
            param.copy_(noise)
            modified_count += 1

    print(f"{modified_count}개의 LoRA 파라미터를 망가뜨렸습니다.")

In [None]:
# 3. 변경된 모델로 생성
gen2 = get_response(ranger_trainer._model, ranger_trainer._tokenizer, "Hello, tell me a story.")
print(f'[Mod] gen2 : {gen2}')

In [None]:
# 4. 모델 저장
ranger_trainer._save('1', '1')

In [None]:
# 5. 메모리 해제 (확실한 테스트를 위해)
del ranger_trainer._model
torch.cuda.empty_cache()

In [None]:
# 6. 저장된 경로를 바탕으로 새로운 모델 초기화
MODEL_CONFIG['resume_run_time'] = ranger_trainer._run_time

ranger_trainer2 = RangerTrainer(
    MODEL_CONFIG,
    reward_calculator,
    out_dir
)

In [None]:
# 7. 저장된 모델로 생성
gen3 = get_response(ranger_trainer2._model, ranger_trainer2._tokenizer, "Hello, tell me a story.")
print(f'[Resume] gen3 : {gen3}')