In [1]:
import warnings
warnings.filterwarnings("ignore")

import yaml
from rewardmodel import RandomRewardModel
from ppotrainer import CustomPPOTrainer
from trl import PPOConfig, AutoModelForCausalLMWithValueHead
from datasetbuilder import IMDBDatasetBuilder, DatasetCombiner
# Load config
config_path = "/cephfs/users/bashir/interactive-learning-ma/config/ppo.yaml"
with open(config_path, "r") as f:
    cfg = yaml.safe_load(f)

# Create PPO config
ppo_config = PPOConfig(
    model_name=cfg["model_name"],
    learning_rate=cfg.get("learning_rate", 1.41e-5),
    log_with=cfg.get("log_with", None),
    mini_batch_size=16,#cfg.get("batch_size"),
    batch_size=16,#cfg.get("batch_size"),
)

# Dataset token limit
token_limit = cfg.get("token_limit")
data_path = "/cephfs/users/bashir/interactive-learning-ma/data/ppo/"

query_min_length = 64# cfg.get("query_min_length")
query_max_length =  128 #cfg.get("query_max_length")
# Dataset builders
builder1 = IMDBDatasetBuilder(ppo_config, cache_dir=data_path, min_len=query_min_length, max_len=query_max_length, use_cache=True)
#builder2 = TinyStoriesDatasetBuilder(ppo_config, cache_dir=data_path)

# Combine datasets
combined_dataset = DatasetCombiner([builder1])
combined_dataset.set_token_limit(token_limit=50000)
combined_dataset = combined_dataset.load()

# Reward model
reward_model = RandomRewardModel()

# Models

[2025-07-20 01:46:36,214] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/cephfs/users/bashir/miniconda3/envs/myenv/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/cephfs/users/bashir/miniconda3/envs/myenv/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::runtime_error::~runtime_error()@GLIBCXX_3.4'
/cephfs/users/bashir/miniconda3/envs/myenv/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__gxx_personality_v0@CXXABI_1.3'
/cephfs/users/bashir/miniconda3/envs/myenv/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::tellp()@GLIBCXX_3.4'
/cephfs/users/bashir/miniconda3/envs/myenv/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::steady_clock::now()@GLIBCXX_3.4.19'
/cephfs/users/bashir/miniconda3/envs/myenv/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_replace_aux(unsigned long, unsigned long, unsi

[2025-07-20 01:46:37,619] [INFO] [logging.py:107:log_dist] [Rank -1] [TorchCheckpointEngine] Initialized with serialization = False


2025-07-20 01:46:38,298 - ppo_trainer - INFO - Distributing token limit 50000 across 1 builders
2025-07-20 01:46:38,299 - ppo_trainer - INFO - Token limit set to: 50000
2025-07-20 01:46:38,300 - ppo_trainer - INFO - Loading datasets from builders...
2025-07-20 01:46:38,300 - ppo_trainer - INFO - Loading dataset: stanfordnlp/imdb
2025-07-20 01:46:38,300 - ppo_trainer - INFO - Loading dataset from cache
2025-07-20 01:46:38,308 - ppo_trainer - INFO - Truncating dataset to token limit
2025-07-20 01:46:38,365 - ppo_trainer - INFO - Truncated dataset to 49948 tokens across 538 samples
2025-07-20 01:46:38,386 - ppo_trainer - INFO - Loaded and processed dataset: stanfordnlp/imdb with 538 samples
2025-07-20 01:46:38,386 - ppo_trainer - INFO - Concatenating datasets


In [2]:
model = AutoModelForCausalLMWithValueHead.from_pretrained(cfg["model_name"])
ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(cfg["model_name"])
tokenizer = builder1.tokenizer

# Trainer
trainer = CustomPPOTrainer(
    config=ppo_config,
    model=model,
    ref_model=ref_model,
    tokenizer=tokenizer,
    dataset=combined_dataset,
    reward_fn=reward_model,
    save_base_dir=cfg.get("save_base_dir", "saved_models")
)

# Generation kwargs from config
trainer.set_generation_kwargs(**cfg.get("generation_kwargs", {}))

# Run training loop
trainer.run_training_loop(
    num_epochs=cfg.get("num_epochs", 1),
    
) 


[34m[1mwandb[0m: Currently logged in as: [33mbizalihamza[0m ([33mbizalihamza-fraunhofer-iais[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


2025-07-20 01:46:44,583 - ppo_trainer - INFO - Initialized CustomPPOTrainer
2025-07-20 01:46:44,585 - ppo_trainer - INFO - Setting generation kwargs: {'top_k': 20, 'top_p': 0.9, 'do_sample': True}
2025-07-20 01:46:44,585 - ppo_trainer - INFO - Starting training loop for 2 batches
  0%|          | 0/33 [00:00<?, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
2025-07-20 01:46:45,660 - ppo_trainer - INFO - Generating reward scores for 16 samples
2025-07-20 01:46:46,892 - ppo_trainer - INFO - Batch Stats — Reward Avg: 4.2500, Std: 2.8166 | KL: 0.0000, Entropy: 0.0000, Policy Loss: 0.0000, Value Loss: 0.0000
  3%|▎         | 1/33 [00:02<01:13,  2.30s/it]2025-07-20 01:46:47,489 - ppo_trainer - INFO - Generating reward scores for 16 samples
2025-07-20 01:46:48,287 - ppo_trainer - INFO - Batch Stats — Reward Avg

In [3]:
combined_dataset[1]

{'review': '"I Am Curious: Yellow" is a risible and pretentious steaming pile. It doesn\'t matter what one\'s political views are because this film can hardly be taken seriously on any level. As for the claim that frontal male nudity is an automatic NC-17, that isn\'t true. I\'ve seen R-rated films with male nudity. Granted, they only offer some fleeting views, but where are the R-rated films with gaping vulvas and flapping labia? Nowhere, because they don\'t exist. The same goes for those crappy cable shows: schlongs swinging in the breeze but not a clitoris in sight. And those pretentious indie movies like The Brown Bunny, in which we\'re treated to the site of Vincent Gallo\'s throbbing johnson, but not a trace of pink visible on Chloe Sevigny. Before crying (or implying) "double-standard" in matters of nudity, the mentally obtuse should take into account one unavoidably obvious anatomical difference between men and women: there are no genitals on display when actresses appears nude

In [4]:
a = [1,2,4,5,6]

In [5]:
a[:100]

[1, 2, 4, 5, 6]

In [6]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")

NameError: name 'AutoTokenizer' is not defined

In [None]:
text = "The quick brown fox jumps over the lazy dog. "
input_str = text * 12000  # repeat as many times as needed
tokens = tokenizer.encode(input_str)
print(len(tokens))


Token indices sequence length is longer than the specified maximum sequence length for this model (120001 > 1024). Running this sequence through the model will result in indexing errors


120001


In [None]:
len(tokens[:128])

128

In [None]:
tokenizer.model_max_length

1024

In [None]:
tokenizer.encode("a"*2)

[7252]

In [None]:
"a"*2

'aa'