### Dataset Loading and Preparation

In [7]:
from datasets import load_dataset, load_metric
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer
)

### Reward Model

In [3]:
from src.reward.train_reward import load_trained_warp_reward_model

Load trained model from Hugging Face. I've trained it earlier by using `create_train_reward_model` function in `src/reward/train_reward.py` file.

In [4]:
reward_tokenizer, reward_model = load_trained_warp_reward_model()

### SFT Model

#### Train and Eval dataset preparations

#### Model Training

In [8]:
sft_model_name = "lvwerra/gpt2-imdb"
sft_model = AutoModelForCausalLM.from_pretrained(sft_model_name)
sft_tokenizer = AutoTokenizer.from_pretrained(sft_model_name)

In [9]:
sft_tokenizer.pad_token = sft_tokenizer.eos_token

### WARP Training

In [10]:
from src.trainer.warp_config import WARPConfig
from src.trainer.warp_trainer import WARPTrainer

ModuleNotFoundError: No module named 'utils'

In [None]:
warp_config = WARPConfig()

warp_trainer = WARPTrainer(
    config=warp_config,
    model=sft_model,
    tokenizer=sft_tokenizer,
    reward_model=reward_model,
    reward_tokenizer=reward_tokenizer
)

In [None]:
warp_trainer.train()