In [1]:
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["MLFLOW_TRACKING_URI"] = "http://localhost:5000"

## Configuration

In [2]:
from qurious.config import Config

config = Config.from_yaml_file("./config.yaml")

## Load Grid World dataset

In [3]:
from pathlib import Path

from datasets import load_dataset

from qurious.utils import display_pd_table


# create new "text" column by concatenating "prompt" and "response" columns
def concat_prompt_response(example):
    return {"text": example["prompt"] + example["response"]}


dataset = load_dataset("json", data_files=str(Path(config.paths.data_dir) / "grid_world_1k.jsonl"))["train"]
dataset = dataset.map(concat_prompt_response)

display_pd_table(dataset.select(range(3)), replace_newlines=["env"])

Unnamed: 0,prompt,response,env,size,actions,numeric_actions,n_steps,start_pos,goal_pos,obstacles,text
0,Grid World:\n. # . . .\n. # . . .\n. . . A G\n. . . # .\n. . # # .\n\nActions:\n,right,. # . . . . # . . . . . . A G . . . # . . . # # .,5,right,[1],1,"[2, 3]","[2, 4]","[[3, 3], [4, 3], [1, 1], [4, 2], [0, 1]]",Grid World:\n. # . . .\n. # . . .\n. . . A G\n. . . # .\n. . # # .\n\nActions:\nright
1,Grid World:\n. . . . # . # . . .\n# . . . . . . . . .\n. . . . . # . . . .\n. . . . . . . . . .\n. G # . # . . . . .\n. . . . . # . . # #\n. . . . . . # . . .\n. . . . . . # . # .\n. . . . . . . . # .\n. . . . . # . . # A\n\nActions:\n,up up up left left up left up up left left left left left down,. . . . # . # . . . # . . . . . . . . . . . . . . # . . . . . . . . . . . . . . . G # . # . . . . . . . . . . # . . # # . . . . . . # . . . . . . . . . # . # . . . . . . . . . # . . . . . . # . . # A,10,up up up left left up left up up left left left left left down,"[0, 0, 0, 3, 3, 0, 3, 0, 0, 3, 3, 3, 3, 3, 2]",15,"[9, 9]","[4, 1]","[[1, 0], [2, 5], [4, 4], [8, 8], [4, 2], [0, 4], [9, 8], [6, 6], [9, 8], [5, 8], [5, 5], [9, 5], [0, 6], [7, 6], [1, 0], [7, 8], [5, 9], [7, 6]]",Grid World:\n. . . . # . # . . .\n# . . . . . . . . .\n. . . . . # . . . .\n. . . . . . . . . .\n. G # . # . . . . .\n. . . . . # . . # #\n. . . . . . # . . .\n. . . . . . # . # .\n. . . . . . . . # .\n. . . . . # . . # A\n\nActions:\nup up up left left up left up up left left left left left down
2,Grid World:\n. # . A . . . .\n. . . . . # . .\n. G . # . . . .\n# . . . . . . .\n. . . # . . . .\n. . . . . . # .\n. . . . . # # .\n. . . # . . . .\n\nActions:\n,left down down left,. # . A . . . . . . . . . # . . . G . # . . . . # . . . . . . . . . . # . . . . . . . . . . # . . . . . . # # . . . . # . . . .,8,left down down left,"[3, 2, 2, 3]",4,"[0, 3]","[2, 1]","[[3, 0], [6, 5], [6, 6], [6, 6], [4, 3], [5, 6], [7, 3], [1, 5], [1, 5], [2, 3], [0, 1]]",Grid World:\n. # . A . . . .\n. . . . . # . .\n. G . # . . . .\n# . . . . . . .\n. . . # . . . .\n. . . . . . # .\n. . . . . # # .\n. . . # . . . .\n\nActions:\nleft down down left


## Split Train and Test Dataset

In [4]:
dataset = dataset.train_test_split(test_size=0.2, seed=42)

print(f"Training dataset contains {len(dataset['train'])} examples")
print(f"Test dataset contains {len(dataset['test'])} examples")

Training dataset contains 800 examples
Test dataset contains 200 examples


## Load models and tokenizer with LoraManager

In [5]:
from qurious.llms.lora_manager import LoraManager

lora_manager = LoraManager(config)

# Get the PEFT model
peft_model = lora_manager.get_model("default")
tokenizer = lora_manager.tokenizer

# Make sure the model is in training mode and parameters require gradients
peft_model.train()

# Verify parameters require gradients
trainable_params = 0
all_param = 0
for param in peft_model.parameters():
    all_param += param.numel()
    if param.requires_grad:
        trainable_params += param.numel()

print(
    f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}%"
)

using device: mps
Loading base model: gpt2
Creating adapter: default
trainable params: 1179648 || all params: 125619456 || trainable%: 0.94%




## Padding the Training Dataset

In [6]:
# You can use a different max length if your custom dataset has shorter/longer input sequences.
MAX_LENGTH = 160


def tokenize_and_pad_to_fixed_length(sample):
    result = tokenizer(
        sample["text"],
        truncation=True,
        max_length=MAX_LENGTH,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result


tokenized_train_dataset = dataset["train"].map(
    tokenize_and_pad_to_fixed_length, remove_columns=dataset["train"].column_names
)
tokenized_eval_dataset = dataset["test"].map(
    tokenize_and_pad_to_fixed_length, remove_columns=dataset["test"].column_names
)

assert all(len(x["input_ids"]) == MAX_LENGTH for x in tokenized_train_dataset)
assert all(len(x["input_ids"]) == MAX_LENGTH for x in tokenized_eval_dataset)

# assert that every attention_mask starts with a 0 (no example is cut off)
assert all(x["attention_mask"][0] == 0 for x in tokenized_train_dataset if len(x["attention_mask"]) > 0)
assert all(x["attention_mask"][0] == 0 for x in tokenized_eval_dataset if len(x["attention_mask"]) > 0)

display_pd_table(tokenized_train_dataset.select(range(1)))

print(tokenizer.decode(tokenized_train_dataset[0]["input_ids"], skip_special_tokens=True))

Unnamed: 0,input_ids,attention_mask,labels
0,"[50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]","[50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, ...]"


Grid World:
. . . . . .
. . # . # .
. . A # . .
# . # . . .
. . . . . .
. . # G . .

Actions:
left down down right right down


## Train Model

In [7]:
from torch.nn import CrossEntropyLoss
from torch.optim import AdamW
from torch.optim.lr_scheduler import LinearLR
from torch.utils.data import DataLoader

from qurious.llms.trainer import Trainer

# make data loaders for PyTorch format
train_dataloader = DataLoader(tokenized_train_dataset.with_format("torch"), batch_size=8, shuffle=True)
eval_dataloader = DataLoader(tokenized_eval_dataset.with_format("torch"), batch_size=8, shuffle=False)

optimizer = AdamW(peft_model.parameters(), lr=config.training.learning_rate, weight_decay=0.01)
scheduler = LinearLR(optimizer, start_factor=1, end_factor=0.1, total_iters=len(dataset["train"]))
loss_fn = CrossEntropyLoss()

trainer = Trainer(
    model=peft_model,
    config=config,
    optimizer=optimizer,
    scheduler=scheduler,
    loggers=["console", "mlflow"],
    experiment_name="maze-supervised-finetune",
    loss_fn=loss_fn,
)


using device: mps


MlflowException: API request to http://127.0.0.1:5000/api/2.0/mlflow/experiments/get-by-name failed with exception HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /api/2.0/mlflow/experiments/get-by-name?experiment_name=maze-supervised-finetune (Caused by ResponseError('too many 500 error responses'))

In [None]:
trainer.train(train_dataloader=train_dataloader, eval_dataloader=eval_dataloader, num_epochs=config.training.epochs)

best_model = trainer.load_checkpoint(
    str(Path(config.paths.checkpoint_dir) / "best_model.pt"), load_optimizer=False, load_scheduler=False
)

## Generate outputs

In [None]:
import mlflow
import pandas as pd
import torch
from tqdm import tqdm

eval_samples = dataset["test"].select(range(10))
# model = lora_manager.get_base_model()
# model = peft_model
model = best_model

# Generate outputs
results = []
for sample in tqdm(eval_samples):
    # Assuming your dataset has 'input' and 'target' fields
    # Adjust the field names as needed for your specific dataset
    input_text = sample["prompt"]
    reference = sample["response"]

    # Tokenize and generate
    inputs = tokenizer(
        input_text,
        return_tensors="pt",
    ).to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=20,  # Adjust as needed
            do_sample=True,
            temperature=0.3,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
        )

    # Decode the generated output
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the newly generated part (optional)
    # This is model and tokenizer specific, you may need to adjust
    generated_response = generated_text[len(tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)) :]

    results.append({"input": input_text, "generated": generated_response, "reference": reference})


display_pd_table(results, replace_newlines=["input", "generated", "reference"])

df = pd.DataFrame(results)
with mlflow.start_run(trainer.run_id):
    mlflow.log_table(
        df,
        artifact_file="generated_samples.json",
    )