In [1]:
import glob
import jericho

env = jericho.FrotzEnv("./z-machine-games-master/jericho-game-suite/zork1.z5")

initial_obs, info = env.reset()
walkthrough = env.get_walkthrough()

def get_steps(filename: str):
    env = jericho.FrotzEnv(filename)

    steps = []
    
    obs = initial_obs
    for step in walkthrough:
        steps.append((obs, step))
        
        obs, reward, done, info = env.step(step)
        if done:
            break

    env.close()

    return steps


steps = []
for game_file in glob.glob("./z-machine-games-master/jericho-game-suite/*.z5"):
    steps.append(get_steps(game_file))

In [2]:
from datasets import Dataset
from unsloth import standardize_sharegpt

def steps_to_dataset(steps: list[list[tuple[str, str]]], length: int):
    convos = []

    for game in steps:
        convo = []
        n = 0
        
        for step in game:
            convo.append({"from": "human", "value": step[0]})
            convo.append({"from": "gpt", "value": step[1]})
            n += 1
            if length > 0 and n >= length:
                n = 0
                convos.append(convo)
                convo = []

        if len(convo) > 0:
            convos.append(convo)

    return Dataset.from_dict({"conversations": convos})

dataset = steps_to_dataset(steps, 5)
print(dataset[0])
dataset = standardize_sharegpt(dataset)

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
{'conversations': [{'from': 'human', 'value': 'Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.\nZORK is a registered trademark of Infocom, Inc.\nRevision 88 / Serial number 840726\n\nWest of House\nYou are standing in an open field west of a white house, with a boarded front door.\nThere is a small mailbox here.\n\n'}, {'from': 'gpt', 'value': 'N'}, {'from': 'human', 'value': "You'll have to get out of bed first.\n\nThe phone rings."}, {'from': 'gpt', 'value': 'N'}, {'from': 'human', 'value': "You'll have to get out of bed first.\n\nThe phone rings."}, {'from': 'gpt', 'value': 'U'}, {'from': 'human', 'value': 'You get out of bed.\n\nBedroom\nThis bedroom is extremely spare, with dirty laundry scattered haphazardly all over the floor. Cleaner clothing can be found in the dresser. A bathroom lies to the south, while a door to th

Unsloth: Standardizing formats (num_proc=16):   0%|          | 0/2434 [00:00<?, ? examples/s]

In [3]:
#dataset[0]

In [4]:
# Taken from this article:
# https://docs.unsloth.ai/get-started/fine-tuning-llms-guide/tutorial-how-to-finetune-llama-3-and-use-in-ollama
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2025.11.4: Fast Llama patching. Transformers: 4.57.2.
   \\   /|    NVIDIA GeForce RTX 5060 Ti. Num GPUs = 1. Max memory: 15.472 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu128. CUDA: 12.0. CUDA Toolkit: 12.8. Triton: 3.5.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                     "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None
)

Unsloth 2025.11.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [6]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama3"
)

In [7]:
# https://docs.unsloth.ai/get-started/fine-tuning-llms-guide/datasets-guide
def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [
        tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt = False)
        for convo in convos
    ]
    return {'text': texts}

dataset = dataset.map(formatting_prompts_func, batched=True)
#dataset[0]['text']

Map:   0%|          | 0/2434 [00:00<?, ? examples/s]

In [8]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

enable_bf16 = is_bfloat16_supported()

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    formatting_func = formatting_prompts_func,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        # num_train_epochs = 1,
        learning_rate = 2e-4,
        fp16 = not enable_bf16,
        bf16 = enable_bf16,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    )
)

Unsloth: Tokenizing ["text"] (num_proc=20):   0%|          | 0/2434 [00:00<?, ? examples/s]

In [9]:
trainer_stats = trainer.train()

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,434 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,5.204
2,3.7526
3,5.1753
4,3.9478
5,4.8956
6,3.6283
7,3.9078
8,3.9392
9,3.5487
10,3.5127


In [12]:
FastLanguageModel.for_inference(model)

messages = [
    {"role": "user", "content": "You are in a room. You see an egg on a table and a chest of drawers."},
]
input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(input_ids,
    streamer = text_streamer,
    max_new_tokens = 128,
    pad_token_id = tokenizer.eos_token_id,
)

<|begin_of_text|><|start_header_id|>user<|end_header_id|>

You are in a room. You see an egg on a table and a chest of drawers.<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Douse lamp<|reserved_special_token_129|>ÐŽÑ‹ÑŸNÐŽÑ‹ÑŸNuser<|reserved_special_token_6|>

You can't see any such thing.<|reserved_special_token_44|><|reserved_special_token_168|>assistant<|reserved_special_token_13|>

E<|reserved_special_token_167|><|reserved_special_token_40|>user<|reserved_special_token_40|>

You are in a room. You see an egg on a table and a chest of drawers.<|reserved_special_token_39|><|reserved_special_token_105|>assistant<|reserved_special_token_245|>

E<|reserved_special_token_31|><|reserved_special_token_10|>user<|reserved_special_token_137|>

You are in a room. You see an egg on a table and a chest of drawers.<|reserved_special_token_26|><|reserved_special_token_65|>assistant<|reserved_special_token_193|>

E<|reserved_special_token_175|><|reserved_special_token_155|>user<|reserved