In [1]:
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["MLFLOW_TRACKING_URI"] = "http://localhost:5000"

## Load Grid World dataset

In [2]:
import json

import pandas as pd

from qurious.utils import display_pd_table

with open("./grid_world_1k.jsonl") as f:
    data = [json.loads(line) for line in f.readlines()]

df = pd.DataFrame(data, columns=["prompt", "response", "env", "size", "n_steps"])
df["text"] = df["prompt"] + df["response"]
df["env"] = df["env"].apply(lambda x: f"<pre>{x}</pre>")

display_pd_table(df.head(3), replace_newlines=["env"])

Unnamed: 0,prompt,response,env,size,n_steps,text
0,Grid World:\n. # . . .\n. # . . .\n. . . A G\n. . . # .\n. . # # .\n\nActions:\n,right,. # . . . . # . . . . . . A G . . . # . . . # # .,5,1,Grid World:\n. # . . .\n. # . . .\n. . . A G\n. . . # .\n. . # # .\n\nActions:\nright
1,Grid World:\n. . . . # . # . . .\n# . . . . . . . . .\n. . . . . # . . . .\n. . . . . . . . . .\n. G # . # . . . . .\n. . . . . # . . # #\n. . . . . . # . . .\n. . . . . . # . # .\n. . . . . . . . # .\n. . . . . # . . # A\n\nActions:\n,up up up left left up left up up left left left left left down,. . . . # . # . . . # . . . . . . . . . . . . . . # . . . . . . . . . . . . . . . G # . # . . . . . . . . . . # . . # # . . . . . . # . . . . . . . . . # . # . . . . . . . . . # . . . . . . # . . # A,10,15,Grid World:\n. . . . # . # . . .\n# . . . . . . . . .\n. . . . . # . . . .\n. . . . . . . . . .\n. G # . # . . . . .\n. . . . . # . . # #\n. . . . . . # . . .\n. . . . . . # . # .\n. . . . . . . . # .\n. . . . . # . . # A\n\nActions:\nup up up left left up left up up left left left left left down
2,Grid World:\n. # . A . . . .\n. . . . . # . .\n. G . # . . . .\n# . . . . . . .\n. . . # . . . .\n. . . . . . # .\n. . . . . # # .\n. . . # . . . .\n\nActions:\n,left down down left,. # . A . . . . . . . . . # . . . G . # . . . . # . . . . . . . . . . # . . . . . . . . . . # . . . . . . # # . . . . # . . . .,8,4,Grid World:\n. # . A . . . .\n. . . . . # . .\n. G . # . . . .\n# . . . . . . .\n. . . # . . . .\n. . . . . . # .\n. . . . . # # .\n. . . # . . . .\n\nActions:\nleft down down left


## Split Train and Test Dataset

In [3]:
from datasets import Dataset

dataset = Dataset.from_pandas(df)
dataset = dataset.train_test_split(test_size=0.2, seed=42)


print(f"Training dataset contains {len(dataset['train'])} examples")
print(f"Test dataset contains {len(dataset['test'])} examples")

Training dataset contains 800 examples
Test dataset contains 200 examples


## Load models and tokenizer with LoraManager

In [4]:
from qurious.config import Config
from qurious.llms.lora_manager import LoraManager

config = Config()
config.model.base_model = "gpt2"
config.training.batch_size = 4
config.training.learning_rate = 5e-5
config.training.scheduler_step_per_batch = True
config.training.log_interval = 10

lora_manager = LoraManager(config)

# Get the PEFT model
peft_model = lora_manager.get_model("default")
tokenizer = lora_manager.tokenizer

# Make sure the model is in training mode and parameters require gradients
peft_model.train()

# Verify parameters require gradients
trainable_params = 0
all_param = 0
for param in peft_model.parameters():
    all_param += param.numel()
    if param.requires_grad:
        trainable_params += param.numel()

print(
    f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}%"
)

using device: mps
Loading base model: gpt2
Creating adapter: default
trainable params: 1179648 || all params: 125619456 || trainable%: 0.94%




## Padding the Training Dataset

As a final step of dataset preparation, we need to apply padding to the training dataset. Padding ensures that all input sequences in a batch are of the same length.

A crucial point to note is the need to add padding to the left. This approach is adopted because the model generates tokens autoregressively, meaning it continues from the last token. Adding padding to the right would cause the model to generate new tokens from these padding tokens, resulting in the output sequence including padding tokens in the middle.

Padding to right
```
Today |  is  |   a    |  cold  |  <pad>  ==generate=>  "Today is a cold <pad> day"
 How  |  to  | become |  <pad> |  <pad>  ==generate=>  "How to become a <pad> <pad> great engineer".
```

Padding to left:
```
<pad> |  Today  |  is  |  a   |  cold     ==generate=>  "<pad> Today is a cold day"
<pad> |  <pad>  |  How |  to  |  become   ==generate=>  "<pad> <pad> How to become a great engineer".
```

In [5]:
# You can use a different max length if your custom dataset has shorter/longer input sequences.
MAX_LENGTH = 256


def tokenize_and_pad_to_fixed_length(sample):
    result = tokenizer(
        sample["prompt"],
        truncation=True,
        max_length=MAX_LENGTH,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result


tokenized_train_dataset = dataset["train"].map(
    tokenize_and_pad_to_fixed_length, remove_columns=dataset["train"].column_names
)
tokenized_eval_dataset = dataset["test"].map(
    tokenize_and_pad_to_fixed_length, remove_columns=dataset["test"].column_names
)

assert all(len(x["input_ids"]) == MAX_LENGTH for x in tokenized_train_dataset)

display_pd_table(tokenized_train_dataset.select(range(1)))

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Unnamed: 0,input_ids,attention_mask,labels
0,"[50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]","[50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, ...]"


## Train Model

In [6]:
from torch.nn import CrossEntropyLoss
from torch.optim import AdamW
from torch.optim.lr_scheduler import LinearLR
from torch.utils.data import DataLoader

from qurious.llms.trainer import Trainer

# make data loaders for PyTorch format
train_dataloader = DataLoader(tokenized_train_dataset.with_format("torch"), batch_size=8, shuffle=True)
eval_dataloader = DataLoader(tokenized_eval_dataset.with_format("torch"), batch_size=8, shuffle=False)

optimizer = AdamW(peft_model.parameters(), lr=config.training.learning_rate, weight_decay=0.01)
scheduler = LinearLR(optimizer, start_factor=1, end_factor=0.1, total_iters=len(dataset["train"]))
loss_fn = CrossEntropyLoss()

trainer = Trainer(
    model=peft_model,
    config=config,
    optimizer=optimizer,
    scheduler=scheduler,
    loggers=["console", "mlflow"],
    experiment_name="maze-gpt2-finetune",
    loss_fn=loss_fn,
)

trainer.train(train_dataloader=train_dataloader, eval_dataloader=eval_dataloader, num_epochs=5)

using device: mps
MLFlow experiment 'maze-gpt2-finetune' started with run name 'loud-eel-800'


2025-03-10 17:23:04,342 - qurious.llms.trainer - INFO - Starting training for 5 epochs


Epoch 1:   0%|          | 0/100 [00:00<?, ?it/s]

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
2025-03-10 17:23:07,182 - qurious.llms.trainer - INFO - step 10: train_loss: 6.8347, epoch: 0, lr: 4.94e-05
2025-03-10 17:23:09,650 - qurious.llms.trainer - INFO - step 20: train_loss: 5.2262, epoch: 0, lr: 4.89e-05
2025-03-10 17:23:12,159 - qurious.llms.trainer - INFO - step 30: train_loss: 2.6965, epoch: 0, lr: 4.83e-05
2025-03-10 17:23:14,684 - qurious.llms.trainer - INFO - step 40: train_loss: 1.6503, epoch: 0, lr: 4.78e-05
2025-03-10 17:23:17,151 - qurious.llms.trainer - INFO - step 50: train_loss: 1.2354, epoch: 0, lr: 4.72e-05
2025-03-10 17:23:19,622 - qurious.llms.trainer - INFO - step 60: train_loss: 1.2232, epoch: 0, lr: 4.66e-05
2025-03-10 17:23:22,099 - qurious.llms.trainer - INFO - step 70: train_loss: 1.0506, epoch: 0, lr: 4.61e-05
2025-03-10 17:23:24,568 - qurious.llms.trainer - INFO - step 80: train_loss: 1.1227, epoch: 0, lr: 4.55e-05
2025-03-10 17:23:27,035 - qurio

Evaluation:   0%|          | 0/25 [00:00<?, ?it/s]

2025-03-10 17:23:31,944 - qurious.llms.trainer - INFO - step 100: eval_loss: 0.8202


Epoch 2:   0%|          | 0/100 [00:00<?, ?it/s]

2025-03-10 17:23:34,446 - qurious.llms.trainer - INFO - step 110: train_loss: 1.1119, epoch: 1, lr: 4.38e-05
2025-03-10 17:23:36,941 - qurious.llms.trainer - INFO - step 120: train_loss: 1.0363, epoch: 1, lr: 4.33e-05
2025-03-10 17:23:39,438 - qurious.llms.trainer - INFO - step 130: train_loss: 0.9403, epoch: 1, lr: 4.27e-05
2025-03-10 17:23:41,939 - qurious.llms.trainer - INFO - step 140: train_loss: 1.1389, epoch: 1, lr: 4.21e-05
2025-03-10 17:23:44,504 - qurious.llms.trainer - INFO - step 150: train_loss: 0.8551, epoch: 1, lr: 4.16e-05
2025-03-10 17:23:47,043 - qurious.llms.trainer - INFO - step 160: train_loss: 0.9845, epoch: 1, lr: 4.10e-05
2025-03-10 17:23:49,539 - qurious.llms.trainer - INFO - step 170: train_loss: 1.0175, epoch: 1, lr: 4.04e-05
2025-03-10 17:23:52,012 - qurious.llms.trainer - INFO - step 180: train_loss: 1.1776, epoch: 1, lr: 3.99e-05
2025-03-10 17:23:54,540 - qurious.llms.trainer - INFO - step 190: train_loss: 1.0225, epoch: 1, lr: 3.93e-05
2025-03-10 17:23:57

Evaluation:   0%|          | 0/25 [00:00<?, ?it/s]

2025-03-10 17:23:59,609 - qurious.llms.trainer - INFO - step 200: eval_loss: 0.7909


Epoch 3:   0%|          | 0/100 [00:00<?, ?it/s]

2025-03-10 17:24:02,303 - qurious.llms.trainer - INFO - step 210: train_loss: 0.9580, epoch: 2, lr: 3.82e-05
2025-03-10 17:24:04,931 - qurious.llms.trainer - INFO - step 220: train_loss: 1.0205, epoch: 2, lr: 3.76e-05
2025-03-10 17:24:07,432 - qurious.llms.trainer - INFO - step 230: train_loss: 1.0382, epoch: 2, lr: 3.71e-05
2025-03-10 17:24:09,975 - qurious.llms.trainer - INFO - step 240: train_loss: 0.9844, epoch: 2, lr: 3.65e-05
2025-03-10 17:24:12,549 - qurious.llms.trainer - INFO - step 250: train_loss: 1.0037, epoch: 2, lr: 3.59e-05
2025-03-10 17:24:15,116 - qurious.llms.trainer - INFO - step 260: train_loss: 1.1032, epoch: 2, lr: 3.54e-05
2025-03-10 17:24:17,659 - qurious.llms.trainer - INFO - step 270: train_loss: 0.9423, epoch: 2, lr: 3.48e-05
2025-03-10 17:24:20,158 - qurious.llms.trainer - INFO - step 280: train_loss: 1.2137, epoch: 2, lr: 3.43e-05
2025-03-10 17:24:22,655 - qurious.llms.trainer - INFO - step 290: train_loss: 1.0775, epoch: 2, lr: 3.37e-05
2025-03-10 17:24:25

Evaluation:   0%|          | 0/25 [00:00<?, ?it/s]

2025-03-10 17:24:27,641 - qurious.llms.trainer - INFO - step 300: eval_loss: 0.9329


Epoch 4:   0%|          | 0/100 [00:00<?, ?it/s]

2025-03-10 17:24:30,200 - qurious.llms.trainer - INFO - step 310: train_loss: 0.8827, epoch: 3, lr: 3.26e-05
2025-03-10 17:24:32,706 - qurious.llms.trainer - INFO - step 320: train_loss: 1.1469, epoch: 3, lr: 3.20e-05
2025-03-10 17:24:35,200 - qurious.llms.trainer - INFO - step 330: train_loss: 0.9879, epoch: 3, lr: 3.14e-05
2025-03-10 17:24:37,718 - qurious.llms.trainer - INFO - step 340: train_loss: 1.0194, epoch: 3, lr: 3.09e-05
2025-03-10 17:24:40,211 - qurious.llms.trainer - INFO - step 350: train_loss: 1.1099, epoch: 3, lr: 3.03e-05
2025-03-10 17:24:42,755 - qurious.llms.trainer - INFO - step 360: train_loss: 0.9169, epoch: 3, lr: 2.98e-05
2025-03-10 17:24:45,274 - qurious.llms.trainer - INFO - step 370: train_loss: 1.0286, epoch: 3, lr: 2.92e-05
2025-03-10 17:24:47,780 - qurious.llms.trainer - INFO - step 380: train_loss: 0.9116, epoch: 3, lr: 2.86e-05
2025-03-10 17:24:50,296 - qurious.llms.trainer - INFO - step 390: train_loss: 1.1038, epoch: 3, lr: 2.81e-05
2025-03-10 17:24:52

Evaluation:   0%|          | 0/25 [00:00<?, ?it/s]

2025-03-10 17:24:55,408 - qurious.llms.trainer - INFO - step 400: eval_loss: 0.8381


Epoch 5:   0%|          | 0/100 [00:00<?, ?it/s]

2025-03-10 17:24:57,952 - qurious.llms.trainer - INFO - step 410: train_loss: 1.0542, epoch: 4, lr: 2.69e-05
2025-03-10 17:25:00,519 - qurious.llms.trainer - INFO - step 420: train_loss: 1.0297, epoch: 4, lr: 2.64e-05
2025-03-10 17:25:03,093 - qurious.llms.trainer - INFO - step 430: train_loss: 0.9424, epoch: 4, lr: 2.58e-05
2025-03-10 17:25:05,694 - qurious.llms.trainer - INFO - step 440: train_loss: 1.0396, epoch: 4, lr: 2.53e-05
2025-03-10 17:25:08,390 - qurious.llms.trainer - INFO - step 450: train_loss: 1.2363, epoch: 4, lr: 2.47e-05
2025-03-10 17:25:11,098 - qurious.llms.trainer - INFO - step 460: train_loss: 0.9789, epoch: 4, lr: 2.41e-05
2025-03-10 17:25:13,678 - qurious.llms.trainer - INFO - step 470: train_loss: 1.0828, epoch: 4, lr: 2.36e-05
2025-03-10 17:25:16,331 - qurious.llms.trainer - INFO - step 480: train_loss: 1.2786, epoch: 4, lr: 2.30e-05
2025-03-10 17:25:19,006 - qurious.llms.trainer - INFO - step 490: train_loss: 1.1891, epoch: 4, lr: 2.24e-05
2025-03-10 17:25:21

Evaluation:   0%|          | 0/25 [00:00<?, ?it/s]

2025-03-10 17:25:24,505 - qurious.llms.trainer - INFO - step 500: eval_loss: 0.9036
2025-03-10 17:25:24,509 - qurious.llms.trainer - INFO - Training completed. Best eval_loss was at epoch 2


🏃 View run loud-eel-800 at: http://127.0.0.1:5000/#/experiments/926729112683475320/runs/87405fdab2e041cfbe8ff836b7ebcc08
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/926729112683475320


{'train_loss': [2.622568038702011,
  1.0216279006004334,
  1.0142583006620407,
  1.04672212600708,
  1.0777782374620437],
 'eval_loss': [0.8202412390708923,
  0.7909462666511535,
  0.9328550434112549,
  0.8380931901931763,
  0.9036318516731262]}