In [None]:
# Overview on training models

For both SFT/KTO/DPO you have to set up a yaml file with the hyperparameters of the TRL trainer.
The sft/kto/dpo classes are only wrappers on top of the TRL class invoking it with the correct hyperparameters.

You can check the templates defined in `config` directory.

Examples:

### SFT

In [None]:
from src.util import PROJECT_DIR
import os
import yaml
import tempfile

sft_yaml = """
max_seq_length: 12
language: kotlin
dataset_size: 10
epochs: 1
per_device_train_batch_size: 1
gradient_accumulation_steps: 1
learning_rate: 1.41e-5
weight_decay: 0.1
no_lora: True
lora_r: 64
lora_alpha: 16
lora_dropout: 0.05
dataset_name: stojchet/deepseek_bs1_kotlin-empty
base_model: deepseek-ai/deepseek-coder-1.3b-base
dataset_ref_field: whole_func_string
"""

with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".yaml", dir=PROJECT_DIR) as temp_file:
    yaml.dump(sft_yaml, temp_file)
    temp_file_path = temp_file.name
    file_name = os.path.basename(temp_file_path).replace(".yaml", "")
    abs_path = os.path.abspath(temp_file_path)
    print("File path:", temp_file.name)  # Print the temp file path
    print(file_name)

In [None]:
!python3 ../src/model/sft_model.py --config_path="" --config_name="$file_name"
! rm "$abs_path"

In [None]:
kto_yaml = """
max_seq_length: 1500
language: kotlin
dataset_size: 10000
epochs: 1
per_device_train_batch_size: 8
gradient_accumulation_steps: 16
learning_rate: 1.41e-5
lora_r: 64
lora_alpha: 16
lora_dropout: 0.05
warmup_ratio: 0.1
dataset_name: stojchet/kto-deepseek_bs1_kotlin-empty
base_model: deepseek-ai/deepseek-coder-1.3b-base
"""

with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".yaml", dir=PROJECT_DIR) as temp_file:
    yaml.dump(kto_yaml, temp_file)
    temp_file_path = temp_file.name
    file_name = os.path.basename(temp_file_path).replace(".yaml", "")
    abs_path = os.path.abspath(temp_file_path)
    print("File path:", temp_file.name)  # Print the temp file path
    print(file_name)

In [None]:
!python3 ../src/model/kto_model.py --config_path="" --config_name="$file_name"
! rm "$abs_path"

In [None]:
dpo_yaml = """
max_seq_length: 1500
language: kotlin
dataset_size: 10000
epochs: 1
per_device_train_batch_size: 1
gradient_accumulation_steps: 64
learning_rate: 1.41e-5
lora_r: 64
lora_alpha: 16
lora_dropout: 0.05
dataset_name: stojchet/dpo-deepseek_bs1_kotlin-empty
base_model: "deepseek-ai/deepseek-coder-1.3b-base"
"""

In [None]:
!python3 ../src/model/dpo_model.py --config_path="" --config_name="$file_name"
! rm "$abs_path"