In [0]:
import yt.wrapper as yt
import uuid
import os

In [1]:
yt.config["pickling"]["dynamic_libraries"]["enable_auto_collection"] = False
yt.config["pickling"]["ignore_system_modules"] = True
yt.config["pickling"]["safe_stream_mode"] = False

In [2]:
working_dir = f"//tmp/examples/tractorun-tiny-stories-finetune-{uuid.uuid4()}"
yt.create("map_node", working_dir, recursive=True)
print(working_dir)

//tmp/examples/tractorun-tiny-stories-finetune-875dc7db-a990-4a66-93db-38836f8dfa1b


In [3]:
from tractorun.toolbox import Toolbox
from tractorun.run import run
from tractorun.mesh import Mesh
from tractorun.resources import Resources
from tractorun.backend.generic import GenericBackend
from tractorun.backend.tractorch import Tractorch
from tractorun.stderr_reader import StderrMode

In [4]:
from yt import type_info

TALES_PER_JOB = 500

hf_token = os.environ.get("YT_SECURE_VAULT_HF_TOKEN", "")
assert hf_token is not None, "set HF token in kernel's secrets to use llama"

schema = yt.schema.TableSchema(strict=False)
schema.add_column("text", type_info.String)

datasets_path = f"{working_dir}/datasets"
yt.create("map_node", datasets_path)


MAX_TOKENS = 1000


def prepare_dataset(toolbox: Toolbox):
    from vllm import LLM, SamplingParams
    import os
    
    os.environ["HF_TOKEN"] = hf_token
    os.environ["CUDA_VISIBLE_DEVICES"] = str(toolbox.coordinator.get_process_index())

    self_index = toolbox.coordinator.get_self_index()
    table_path = f"{datasets_path}/dataset_{self_index}"

    llm = LLM(model="meta-llama/Llama-3.2-3B-Instruct", seed=self_index,)

    sampling_params = SamplingParams(
        temperature=0.9,
        top_p=0.85,
        max_tokens=MAX_TOKENS,
    )
    conversations = [
        [
            {
                "role": "system",
                "content": "You are a professional storyteller. Write the story in one paragraph, without line breaks. A user will now ask you to tell a fairy tale, and you must create a story featuring Tracto.ai. Tracto.ai the a ai-startup that provides infrastructure for machine learning and big data processing."
            },
            {
                "role": "user",
                "content": f"Write the {index}th fairy tail about some animal please.",
            },
        ] for index in range(TALES_PER_JOB)
    ]

    results = llm.chat(
        messages=conversations,
        sampling_params=sampling_params,
    )

    tales = ({"text": result.outputs[0].text} for result in results)

    toolbox.yt_client.create("table", table_path, attributes={"schema": schema.to_yson_type()}, force=True)
    toolbox.yt_client.write_table(table_path, tales)


run(
    prepare_dataset,
    backend=GenericBackend(),
    proxy_stderr_mode=StderrMode.primary,
    yt_path=f"{working_dir}/tractorun_inference",
    mesh=Mesh(node_count=2, gpu_per_process=1, process_per_node=8, pool="fifo", pool_trees=["gpu_h200"]),
    resources=Resources(
        cpu_limit=64,
        memory_limit=322122547200,
    ),
)

dataset_parts = [f"{datasets_path}/dataset_{i}" for i in range(2 * 8)]
dataset_path = f"{datasets_path}/dataset"

yt.run_merge(
    dataset_parts,
    dataset_path,
)





2025-02-06 18:51:18,007	INFO	Operation started: https://playground.yt.nebius.yt/playground/operations/cd0e06f6-42ee0531-270703e8-1b2ce6c0/details


2025-02-06 18:51:18,031	INFO	( 0 min) operation cd0e06f6-42ee0531-270703e8-1b2ce6c0 starting


2025-02-06 18:51:18,561	INFO	( 0 min) operation cd0e06f6-42ee0531-270703e8-1b2ce6c0 initializing


2025-02-06 18:51:19,629	INFO	( 0 min) Unrecognized spec: {'enable_partitioned_data_balancing': false}


2025-02-06 18:51:19,652	INFO	( 0 min) operation cd0e06f6-42ee0531-270703e8-1b2ce6c0: running=0     completed=0     pending=2     failed=0     aborted=0     lost=0     total=2     blocked=0    


2025-02-06 18:51:21,849	INFO	( 0 min) operation cd0e06f6-42ee0531-270703e8-1b2ce6c0: running=2     completed=0     pending=0     failed=0     aborted=0     lost=0     total=2     blocked=0    


  __tar.extractall(destination)


  __tar.extractall(destination)
Failed to write user statistics
Waiting for all peers to start
All peers started
  __tar.extractall(destination)
Failed to write user statistics
Waiting for all peers to start
All peers started
  __tar.extractall(destination)
Failed to write user statistics
Waiting for all peers to start
All peers started
  __tar.extractall(destination)
Failed to write user statistics
Waiting for all peers to start
All peers started
  __tar.extractall(destination)
Failed to write user statistics
Waiting for all peers to start
All peers started
  __tar.extractall(destination)
Failed to write user statistics
Waiting for all peers to start
All peers started
  __tar.extractall(destination)
Failed to write user statistics
Waiting for all peers to start
All peers started
  __tar.extractall(destination)
Failed to write user statistics
Waiting for all peers to start
All peers started


INFO 02-06 18:51:26 __init__.py:183] Automatically detected platform cuda.
INFO 02-06 18:51:27 __init__.py:183] Automatically detected platform cuda.
INFO 02-06 18:51:27 __init__.py:183] Automatically detected platform cuda.
INFO 02-06 18:51:28 __init__.py:183] Automatically detected platform cuda.
INFO 02-06 18:51:28 __init__.py:183] Automatically detected platform cuda.
INFO 02-06 18:51:28 __init__.py:183] Automatically detected platform cuda.
INFO 02-06 18:51:28 __init__.py:183] Automatically detected platform cuda.
INFO 02-06 18:51:28 __init__.py:183] Automatically detected platform cuda.


INFO 02-06 18:51:42 config.py:526] This model supports multiple tasks: {'classify', 'embed', 'reward', 'score', 'generate'}. Defaulting to 'generate'.
INFO 02-06 18:51:42 config.py:1538] Chunked prefill is enabled with max_num_batched_tokens=2048.
INFO 02-06 18:51:42 llm_engine.py:232] Initializing a V0 LLM engine (v0.7.1) with config: model='meta-llama/Llama-3.2-3B-Instruct', speculative_config=None, tokenizer='meta-llama/Llama-3.2-3B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_fo

INFO 02-06 18:51:47 cuda.py:235] Using Flash Attention backend.
INFO 02-06 18:51:47 cuda.py:235] Using Flash Attention backend.
INFO 02-06 18:51:46 cuda.py:235] Using Flash Attention backend.
INFO 02-06 18:51:47 cuda.py:235] Using Flash Attention backend.
INFO 02-06 18:51:47 cuda.py:235] Using Flash Attention backend.
INFO 02-06 18:51:47 cuda.py:235] Using Flash Attention backend.
INFO 02-06 18:51:47 cuda.py:235] Using Flash Attention backend.
INFO 02-06 18:51:47 cuda.py:235] Using Flash Attention backend.


INFO 02-06 18:51:56 model_runner.py:1111] Starting to load model meta-llama/Llama-3.2-3B-Instruct...
INFO 02-06 18:51:56 weight_utils.py:251] Using model weights format ['*.safetensors']
INFO 02-06 18:51:56 model_runner.py:1111] Starting to load model meta-llama/Llama-3.2-3B-Instruct...
INFO 02-06 18:51:56 weight_utils.py:251] Using model weights format ['*.safetensors']
INFO 02-06 18:51:56 model_runner.py:1111] Starting to load model meta-llama/Llama-3.2-3B-Instruct...
INFO 02-06 18:51:56 weight_utils.py:251] Using model weights format ['*.safetensors']
INFO 02-06 18:51:56 model_runner.py:1111] Starting to load model meta-llama/Llama-3.2-3B-Instruct...
INFO 02-06 18:51:56 weight_utils.py:251] Using model weights format ['*.safetensors']
INFO 02-06 18:51:56 model_runner.py:1111] Starting to load model meta-llama/Llama-3.2-3B-Instruct...
INFO 02-06 18:51:56 weight_utils.py:251] Using model weights format ['*.safetensors']
INFO 02-06 18:51:56 model_runner.py:1111] Starting to load model 


Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]

Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:00<00:00,  4.04it/s]

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]

Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:00<00:00,  4.06it/s]

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]



Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:01<00:00,  1.68it/s]

Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:01<00:00,  1.84it/s]


Capturing CUDA graph shapes:   0%|          | 0/35 [00:00<?, ?it/s]
Capturing CUDA graph shapes:   3%|▎         | 1/35 [00:00<00:14,  2.30it/s]
Capturing CUDA graph shapes:   6%|▌         | 2/35 [00:00<00:14,  2.28it/s]
Capturing CUDA graph shapes:   9%|▊         | 3/35 [00:01<00:12,  2.47it/s]

Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:01<00:00,  1.63it/s]

Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:01<00:00,  1.79it/s]


Capturing CUDA graph shapes:   0%|          | 0/35 [00:00<?, ?it/s]
Capturing CUDA graph shapes:   3%|▎         | 1/35 [00:00<00:14,  2.38it/s]

Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:00<00:00,  3.81it/s]

Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:01<00:00,  1.58it/s]

Loading safetensors checkpoint shards: 


Capturing CUDA graph shapes:  11%|█▏        | 4/35 [00:01<00:12,  2.43it/s]
Capturing CUDA graph shapes:  14%|█▍        | 5/35 [00:02<00:12,  2.46it/s]
Capturing CUDA graph shapes:  17%|█▋        | 6/35 [00:02<00:12,  2.39it/s]
Capturing CUDA graph shapes:  20%|██        | 7/35 [00:02<00:11,  2.47it/s]
Capturing CUDA graph shapes:  23%|██▎       | 8/35 [00:03<00:10,  2.46it/s]
Capturing CUDA graph shapes:  26%|██▌       | 9/35 [00:03<00:10,  2.50it/s]
Capturing CUDA graph shapes:  29%|██▊       | 10/35 [00:04<00:09,  2.57it/s]
Capturing CUDA graph shapes:  31%|███▏      | 11/35 [00:04<00:09,  2.55it/s]
Capturing CUDA graph shapes:  34%|███▍      | 12/35 [00:04<00:08,  2.57it/s]
Capturing CUDA graph shapes:  37%|███▋      | 13/35 [00:05<00:08,  2.63it/s]
Capturing CUDA graph shapes:  40%|████      | 14/35 [00:05<00:08,  2.60it/s]
Capturing CUDA graph shapes:  43%|████▎     | 15/35 [00:05<00:07,  2.59it/s]

Capturing CUDA graph shapes:   6%|▌         | 2/35 [00:00<00:12,  2.64it/s]
Capt


Capturing CUDA graph shapes:  46%|████▌     | 16/35 [00:06<00:07,  2.64it/s]
Capturing CUDA graph shapes:  49%|████▊     | 17/35 [00:06<00:06,  2.68it/s]
Capturing CUDA graph shapes:  51%|█████▏    | 18/35 [00:07<00:06,  2.65it/s]
Capturing CUDA graph shapes:  54%|█████▍    | 19/35 [00:07<00:06,  2.61it/s]
Capturing CUDA graph shapes:  57%|█████▋    | 20/35 [00:07<00:05,  2.61it/s]
Capturing CUDA graph shapes:  60%|██████    | 21/35 [00:08<00:05,  2.63it/s]
Capturing CUDA graph shapes:  63%|██████▎   | 22/35 [00:08<00:04,  2.64it/s]
Capturing CUDA graph shapes:  66%|██████▌   | 23/35 [00:08<00:04,  2.63it/s]
Capturing CUDA graph shapes:  69%|██████▊   | 24/35 [00:09<00:04,  2.62it/s]
Capturing CUDA graph shapes:  71%|███████▏  | 25/35 [00:09<00:03,  2.63it/s]
Capturing CUDA graph shapes:  74%|███████▍  | 26/35 [00:10<00:03,  2.67it/s]
Capturing CUDA graph shapes:  77%|███████▋  | 27/35 [00:10<00:03,  2.65it/s]
Capturing CUDA graph shapes:  80%|████████  | 28/35 [00:10<00:02,  2.61it/s


Capturing CUDA graph shapes:  83%|████████▎ | 29/35 [00:11<00:02,  2.54it/s]
Capturing CUDA graph shapes:  86%|████████▌ | 30/35 [00:11<00:01,  2.56it/s]
Capturing CUDA graph shapes:  89%|████████▊ | 31/35 [00:12<00:01,  2.57it/s]
Capturing CUDA graph shapes:  91%|█████████▏| 32/35 [00:12<00:01,  2.62it/s]
Capturing CUDA graph shapes:  94%|█████████▍| 33/35 [00:12<00:00,  2.66it/s]
Capturing CUDA graph shapes:  97%|█████████▋| 34/35 [00:13<00:00,  2.72it/s]
Capturing CUDA graph shapes: 100%|██████████| 35/35 [00:13<00:00,  2.71it/s]
Capturing CUDA graph shapes: 100%|██████████| 35/35 [00:13<00:00,  2.59it/s]

Processed prompts:   0%|          | 0/500 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Capturing CUDA graph shapes:  89%|████████▊ | 31/35 [00:11<00:01,  2.73it/s]
Capturing CUDA graph shapes:  91%|█████████▏| 32/35 [00:11<00:01,  2.75it/s]
Capturing CUDA graph shapes:  94%|█████████▍| 33/35 [00:11<00:00,  2.76it/s]
Capturing CUDA graph shapes:  97%|█████


Processed prompts:   0%|          | 1/500 [00:04<36:41,  4.41s/it, est. speed input: 24.93 toks/s, output: 45.78 toks/s]
Processed prompts:   1%|          | 3/500 [00:04<10:02,  1.21s/it, est. speed input: 71.79 toks/s, output: 136.62 toks/s]
Processed prompts:   1%|▏         | 7/500 [00:04<03:24,  2.41it/s, est. speed input: 162.28 toks/s, output: 308.33 toks/s]
Processed prompts:   3%|▎         | 13/500 [00:05<01:44,  4.67it/s, est. speed input: 274.70 toks/s, output: 533.26 toks/s]
Processed prompts:   4%|▍         | 20/500 [00:05<00:55,  8.65it/s, est. speed input: 413.75 toks/s, output: 814.72 toks/s]
Processed prompts:   5%|▌         | 26/500 [00:05<00:37, 12.66it/s, est. speed input: 526.75 toks/s, output: 1050.37 toks/s]
Processed prompts:   6%|▌         | 31/500 [00:05<00:29, 16.14it/s, est. speed input: 614.43 toks/s, output: 1232.28 toks/s]
Processed prompts:   8%|▊         | 41/500 [00:05<00:17, 26.25it/s, est. speed input: 796.55 toks/s, output: 1626.30 toks/s]
Processed 


Processed prompts:  27%|██▋       | 137/500 [00:06<00:03, 92.87it/s, est. speed input: 2250.81 toks/s, output: 5062.73 toks/s]
Processed prompts:  29%|██▉       | 147/500 [00:06<00:04, 84.43it/s, est. speed input: 2363.33 toks/s, output: 5357.47 toks/s]
Processed prompts:  31%|███       | 156/500 [00:06<00:04, 76.68it/s, est. speed input: 2455.02 toks/s, output: 5607.47 toks/s]
Processed prompts:  33%|███▎      | 167/500 [00:07<00:04, 81.17it/s, est. speed input: 2584.43 toks/s, output: 5952.62 toks/s]
Processed prompts:  35%|███▌      | 176/500 [00:07<00:05, 64.28it/s, est. speed input: 2642.16 toks/s, output: 6129.49 toks/s]
Processed prompts:  37%|███▋      | 184/500 [00:07<00:04, 64.94it/s, est. speed input: 2717.92 toks/s, output: 6350.45 toks/s]
Processed prompts:  38%|███▊      | 191/500 [00:07<00:05, 57.63it/s, est. speed input: 2760.35 toks/s, output: 6491.22 toks/s]
Processed prompts:  40%|████      | 201/500 [00:07<00:04, 64.83it/s, est. speed input: 2861.61 toks/s, output:


Processed prompts:  59%|█████▉    | 297/500 [00:11<00:04, 47.30it/s, est. speed input: 2773.55 toks/s, output: 6857.97 toks/s]
Processed prompts:  62%|██████▏   | 309/500 [00:11<00:03, 60.69it/s, est. speed input: 2857.67 toks/s, output: 7047.83 toks/s]
Processed prompts:  63%|██████▎   | 317/500 [00:12<00:03, 60.93it/s, est. speed input: 2899.99 toks/s, output: 7142.01 toks/s]
Processed prompts:  65%|██████▍   | 324/500 [00:12<00:03, 57.22it/s, est. speed input: 2928.99 toks/s, output: 7201.24 toks/s]
Processed prompts:  67%|██████▋   | 337/500 [00:12<00:02, 63.14it/s, est. speed input: 3003.39 toks/s, output: 7373.64 toks/s]
Processed prompts:  69%|██████▉   | 344/500 [00:12<00:02, 64.56it/s, est. speed input: 3040.99 toks/s, output: 7456.19 toks/s]
Processed prompts:  72%|███████▏  | 362/500 [00:12<00:02, 50.80it/s, est. speed input: 3090.66 toks/s, output: 7578.47 toks/s]
Processed prompts:  75%|███████▌  | 377/500 [00:12<00:01, 65.68it/s, est. speed input: 3191.97 toks/s, output:


Processed prompts: 100%|██████████| 500/500 [00:14<00:00, 33.35it/s, est. speed input: 3668.92 toks/s, output: 9387.70 toks/s]


2025-02-06 18:54:38,981	INFO	( 3 min) operation cd0e06f6-42ee0531-270703e8-1b2ce6c0 completed


2025-02-06 18:54:44,635	INFO	Operation started: https://playground.yt.nebius.yt/playground/operations/f598913-b2269a5e-270703e8-c888c333/details


2025-02-06 18:54:44,646	INFO	( 0 min) operation f598913-b2269a5e-270703e8-c888c333 starting


2025-02-06 18:54:45,177	INFO	( 0 min) operation f598913-b2269a5e-270703e8-c888c333 initializing


2025-02-06 18:54:45,733	INFO	( 0 min) Unrecognized spec: {'enable_partitioned_data_balancing': false}


2025-02-06 18:54:45,734	INFO	( 0 min) operation f598913-b2269a5e-270703e8-c888c333 completing


2025-02-06 18:54:46,262	INFO	( 0 min) operation f598913-b2269a5e-270703e8-c888c333 completed


<yt.wrapper.operation_commands.Operation at 0x7f2ba468f710>

In [5]:
from tractorun.backend.tractorch import YtDataset
from tractorun.backend.tractorch.serializer import TensorSerializer

from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig


class YTTransform:
    def __init__(self, tokenizer: AutoTokenizer):
        self._tokenizer = tokenizer

    def __call__(self, columns: list[str], row: dict) -> tuple:
        assert columns == ["text"]
        input_ids = self._tokenizer(yt.yson.get_bytes(row["text"]).decode(), padding="max_length", max_length=MAX_TOKENS)["input_ids"]
        return {
            "input_ids": input_ids,
        }


def get_dataset(
    path: str,
    tokenizer: AutoTokenizer,
    yt_client: yt.YtClient,
) -> tuple[YtDataset, YtDataset]:
    start = 0
    end = yt_client.get(path + "/@row_count")

    train_end = int(end * 0.8)
    eval_start = train_end + 1

    train_dataset = YtDataset(path=path, yt_client=yt_client, transform=YTTransform(tokenizer), start=start, end=train_end, columns=["text"])
    eval_dataset = YtDataset(path=path, yt_client=yt_client, transform=YTTransform(tokenizer), start=eval_start, end=end, columns=["text"])
    return train_dataset, eval_dataset

In [6]:
from transformers import (
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
    AutoModelForCausalLM,
    GenerationConfig,
)
from transformers.trainer_pt_utils import AcceleratorConfig


def training(toolbox: Toolbox):
    model = AutoModelForCausalLM.from_pretrained(
        "roneneldan/TinyStories-3M",
        trust_remote_code=True,
        use_cache = False,
    )
    tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
    tokenizer.pad_token = tokenizer.eos_token
    data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=2, mlm=False)
    train_dataset, eval_dataset = get_dataset(
        path=dataset_path,
        tokenizer=tokenizer,
        yt_client=toolbox.yt_client,
    )
    args = TrainingArguments(
        output_dir="/tmp/results",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=1,
        eval_on_start=True,
        eval_strategy="epoch",
        num_train_epochs=8,
        weight_decay=0.1,
        lr_scheduler_type="constant",
        learning_rate=5e-5,
        save_steps=0.0,  # don't save checkpoints
        logging_dir=None,
        logging_strategy="epoch",
        fp16=True,
        push_to_hub=False,
        batch_eval_metrics=False,
        accelerator_config=AcceleratorConfig(
            split_batches=True,
            dispatch_batches=True,
        ),
    )
    args = args.set_dataloader(train_batch_size=16, drop_last=True)
    trainer = Trainer(
        model=model,
        processing_class=tokenizer,
        args=args,
        data_collator=data_collator,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
    )
    trainer.train()
    if toolbox.coordinator.is_primary():
        toolbox.save_model(TensorSerializer().serialize(trainer.model))


run(
    training,
    backend=Tractorch(),
    yt_path=f"{working_dir}/tractorun_training",
    mesh=Mesh(node_count=1, gpu_per_process=1, process_per_node=8, pool="fifo", pool_trees=["gpu_h200"]),
    resources=Resources(
        cpu_limit=64,
        memory_limit=322122547200,
    ),
)





2025-02-06 18:54:53,428	INFO	Operation started: https://playground.yt.nebius.yt/playground/operations/eb6a1d17-8cc5bc72-270703e8-62e5f601/details


2025-02-06 18:54:53,451	INFO	( 0 min) operation eb6a1d17-8cc5bc72-270703e8-62e5f601 starting


2025-02-06 18:54:53,978	INFO	( 0 min) operation eb6a1d17-8cc5bc72-270703e8-62e5f601 initializing


2025-02-06 18:54:55,586	INFO	( 0 min) Unrecognized spec: {'enable_partitioned_data_balancing': false}


2025-02-06 18:54:55,610	INFO	( 0 min) operation eb6a1d17-8cc5bc72-270703e8-62e5f601: running=0     completed=0     pending=1     failed=0     aborted=0     lost=0     total=1     blocked=0    


2025-02-06 18:54:57,806	INFO	( 0 min) operation eb6a1d17-8cc5bc72-270703e8-62e5f601: running=1     completed=0     pending=0     failed=0     aborted=0     lost=0     total=1     blocked=0    


2025-02-06 18:58:30,075	INFO	( 3 min) operation eb6a1d17-8cc5bc72-270703e8-62e5f601 completing


2025-02-06 18:58:30,603	INFO	( 3 min) operation eb6a1d17-8cc5bc72-270703e8-62e5f601 completed


RunInfo(operation_spec={'description': {'notebook_path': '//home/chiffa/tractorun_example_1/notebook'}, 'started_by': {'hostname': 'end-a100-0.exec-nodes-a100.tundra.svc.testy.k8s.nebius.yt', 'pid': 3421, 'command': ['/slot/sandbox/jlab/site-packages/ipykernel_launcher.py', '-f', '/slot/sandbox/.local/share/jupyter/runtime/kernel-017cbf55-741c-494d-bf7a-4483604e8ce7.json'], 'wrapper_version': '0.13.22', 'python_version': '3.12.8', 'user': 'root', 'platform': 'Debian GNU/Linux 12 (bookworm)'}, 'fail_on_job_restart': True, 'is_gang': True, 'annotations': {'is_tractorun': True}, 'tasks': {'task': {'command': 'python3 _py_runner.py wrapped.pickle config_dump _modules_info _main_module.py _main_module PY_SOURCE', 'job_count': 1, 'gpu_limit': 8, 'port_count': 8, 'cpu_limit': 64, 'memory_limit': 322133234892, 'docker_image': 'cr.eu-north1.nebius.cloud/e00faee7vas5hpsh3s/chiffa/example:v1', 'file_paths': [{'value': '//tmp/yt_wrapper/file_storage/new_cache/dd/673cbb3cb215bad8d8d26e236296b8dd', 

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import torch
import io

incarnation = sorted(yt.list(f"{working_dir}/tractorun_training/models"), key=lambda x: int(x), reverse=True)[0]

raw_model = io.BytesIO(yt.read_file(f"{working_dir}/tractorun_training/models/{incarnation}").read())

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.load(raw_model).to(device)

tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
tokenizer.pad_token = tokenizer.eos_token

  model = torch.load(raw_model).to(device)


In [8]:
prompt = f"Long time ago"
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
output = model.generate(input_ids, max_length = 200, num_beams=1, temperature=0.7, do_sample=True)
output_text = tokenizer.decode(output[0], skip_special_tokens=True)

print("\n\n", output_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.




 Long time ago a cunning fox named Kaito roamed with a thirst for adventure. One day, while exploring the forest, Kaito stumbled upon a hidden clearing where Tracto.ai, a mystical startup, had set up its headquarters. The CEO, a wise and kind fox named Nova, welcomed Kaito with open arms. The fox, with a curious mind and a mind full of wonder, sprang into action, with the help of Tracto.ai's magical algorithms. As they embarked on a quest to optimize the forest's systems, conjured a new system that illuminated the forest's growth. With the help of Tracto.ai's powerful infrastructure, the AI's agility, Kaito and the fox defeated the sly fox of bias and the treacherous landscape of data, and the fox's paws witherred, helping the team optimize their innovative solutions. And from that day on, Kaito and the Tracto.ai team worked tirelessly to develop a new era of
