In [1]:
import os
import sys

project_root = os.path.dirname(os.getcwd())
sys.path.append(f"{project_root}/src")
sys.path.append(f"{project_root}/third_party")

os.environ["RAY_DEDUP_LOGS"] = "0"
os.environ["RAY_COLOR_PREFIX"] = "0"

from config import gpt2_cfg as cfg


In [2]:
import ray
if ray.is_initialized():
    ray.shutdown()

ray.init(
        runtime_env={
            "env_vars": {          
                "PYTHONPATH": "$PYTHONPATH:" + cfg.project_root + "/src",
                "RAY_DATA_VERBOSE_PROGRESS": "1",
            },
            "working_dir": cfg.project_root,
            "excludes": [
                "/bazel-*",
                ".git",
                "*.pyc",
                "/__pycache__",
                "/outputs",
                "/model",
            ],
        },
        ignore_reinit_error=True,
    )

# convience for debugging
ray.data.DataContext.get_current().execution_options.verbose_progress = False
ray.data.DataContext.log_internal_stack_trace_to_stdout = True

2024-08-06 02:10:05,360	INFO worker.py:1772 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
2024-08-06 02:10:05,369	INFO packaging.py:530 -- Creating a file package for local directory '/workspaces/CaiZi'.
2024-08-06 02:10:05,377	INFO packaging.py:358 -- Pushing file package 'gcs://_ray_pkg_f2a5e622f932d600.zip' (1.41MiB) to Ray cluster...
2024-08-06 02:10:05,385	INFO packaging.py:371 -- Successfully pushed file package 'gcs://_ray_pkg_f2a5e622f932d600.zip'.




In [3]:
from pathlib import Path
data_sources = [ Path(item["path"]) for item in cfg["dataset"]]
text_document_paths = ray.data.from_items(data_sources)

In [4]:
from document_processor import TextDocumentProcessor
train_text_document_processor = TextDocumentProcessor(section="train")
train_texts=text_document_paths.map(train_text_document_processor)

validate_text_document_processor = TextDocumentProcessor(section="validate")
validate_texts=text_document_paths.map(validate_text_document_processor)


In [5]:
from token_processor import TikTokenizer
tokenizer = TikTokenizer()
train_tokens = train_texts.map(tokenizer)
validate_tokens = validate_texts.map(tokenizer)

In [6]:
from chunk_processor import ChunkProcessor

chunk_processor = ChunkProcessor(max_length=cfg["124M"]["block_size"],stride=cfg["124M"]["stride"])
train_chunked_tokens = train_tokens.flat_map(chunk_processor)
validate_chunked_tokens = validate_tokens.flat_map(chunk_processor)

In [7]:
import torch
from torchmetrics.text import Perplexity

import ray
import ray.train

from model.GPT import GPT
from utility import save_checkpoint, resume_checkpoint
from text_generator import TextGenerator


def train_loop_per_worker(config):
    vocab_size = config["vocab_size"]
    dimension_embedding = config["dimension_embedding"]
    block_size = config["block_size"]
    num_layers = config["num_layers"]
    num_headers = config["num_headers"]
    drop_rate = config["drop_rate"]
    qkv_bias = config["qkv_bias"]
    check_frequency = config["check_frequency"]
    batch_size_per_worker = config["batch_size_per_worker"]
    num_epoch_per_worker = config["num_epoch_per_worker"]
    resume_training = config["resume_training"]
    best_checkpoint_dir = config["best_checkpoint_dir"]
    start_context = config["start_context"]

    # GPT model
    model = GPT(
        vocab_size,
        dimension_embedding,
        block_size,
        num_layers,
        num_headers,
        drop_rate,
        qkv_bias,
    )
    model = ray.train.torch.prepare_model(model)

    # optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.0004, weight_decay=0.1)

    # ====== Resume training state from the checkpoint. ======
    epoch_start = 0
    best_perplexity = 1000000.0
    best_epoch = 0

    if resume_training:
        if os.path.exists(best_checkpoint_dir):
            checkpoint = ray.train.Checkpoint.from_directory(best_checkpoint_dir)
        else:
            checkpoint = None
        if checkpoint:
            best_epoch, best_perplexity = resume_checkpoint(model, optimizer, checkpoint)
            epoch_start = best_epoch
            print(
                f"Resumed training from best_epoch {best_epoch},best_perplexity {best_perplexity}"
            )
        else:
            print(f"Checkpoint not found, starting from epoch 0")

    # loss function
    loss_function = torch.nn.CrossEntropyLoss()

    rank = ray.train.get_context().get_world_rank()
    device = torch.device(f"cuda:{rank}" if torch.cuda.is_available() else "cpu")

    # metrics
    metric = Perplexity().to(device)

    # data
    train_data_shard = ray.train.get_dataset_shard("train")
    validate_data_shard = ray.train.get_dataset_shard("validate")

    report_metrics = {
        "epoch": 0,
        "train_loss": 0.0,
        "validate_loss": 0.0,
        "perplexity": 0.0,
        "best_epoch": best_epoch,
        "best_perplexity": best_perplexity,
    }
 
 
    text_generator = TextGenerator(model,device=device)
    tokenizer = TikTokenizer()
    for epoch in range(epoch_start + 1, num_epoch_per_worker + 1):
        model.train()

        report_metrics["epoch"] = epoch

        train_loss = 0
        batch_count = 0
        for batch in train_data_shard.iter_torch_batches(
            batch_size=batch_size_per_worker,
            drop_last=True,
            local_shuffle_buffer_size=1000,
        ):
            batch_count += 1
            input_ids = batch["input_ids"]
            logits = model(input_ids)
            target_ids = batch["target_ids"]
            loss = loss_function(logits.flatten(0, 1), target_ids.flatten())
            train_loss += loss.item()  # only for reporting
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_loss = train_loss / batch_count

        report_metrics["train_loss"] = train_loss

        validate_loss = 0
        perplexity = 0

        if epoch % check_frequency == 0:

            model.eval()

            with torch.no_grad():
                batch_count = 0
                for batch in validate_data_shard.iter_torch_batches(
                    batch_size=1,
                    drop_last=False,
                ):
                    batch_count += 1
                    input_ids = batch["input_ids"]
                    logits = model(input_ids)
                    target_ids = batch["target_ids"]
                    loss = loss_function(logits.flatten(0, 1), target_ids.flatten())
                    validate_loss += loss.item()  # only for reporting
                    metric.update(logits, target_ids)

            validate_loss = validate_loss / batch_count
            perplexity = metric.compute().item()
            metric.reset()

            report_metrics["validate_loss"] = validate_loss
            report_metrics["perplexity"] = perplexity


            # In standard DDP training, where the model is the same across all ranks,
            # only the global rank 0 worker needs to save and report the checkpoint
            if ray.train.get_context().get_world_rank() == 0:
                if perplexity < best_perplexity:
                    best_perplexity = perplexity
                    best_epoch = epoch
                    
                    report_metrics["best_epoch"] = best_epoch
                    report_metrics["best_perplexity"] = best_perplexity


                    # create the best_checkpoint_dir if it does not exist
                    if not os.path.exists(best_checkpoint_dir):
                        os.makedirs(best_checkpoint_dir)

                    save_checkpoint(
                        model,
                        optimizer,
                        epoch,
                        perplexity,
                        best_checkpoint_dir,
                    )

                ray.train.report(metrics=report_metrics)
                
        decoded = text_generator(tokenizer.encode(start_context), max_new_tokens=50, context_size=block_size)
        print(f"\n epoch{epoch}:{decoded}")

2024-08-06 02:10:08.031308: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-06 02:10:08.039493: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-06 02:10:08.048390: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-06 02:10:08.051096: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-06 02:10:08.058914: I tensorflow/core/platform/cpu_feature_guar

In [8]:
from ray.train.torch import TorchTrainer
from ray.train import Result
from ray import train

train_loop_config = {
    "vocab_size": cfg["124M"]["vocab_size"],
    "dimension_embedding": cfg["124M"]["dimension_embedding"],
    "block_size": cfg["124M"]["block_size"],
    "num_layers": cfg["124M"]["num_layers"],
    "num_headers": cfg["124M"]["num_headers"],
    "drop_rate": cfg["124M"]["drop_rate"],
    "qkv_bias": cfg["124M"]["qkv_bias"],
    "check_frequency": cfg["ray_train"]["check_frequency"],
    "batch_size_per_worker": cfg["ray_train"]["batch_size_per_worker"],
    "num_epoch_per_worker": cfg["ray_train"]["num_epoch_per_worker"],
    "resume_training":cfg["ray_train"]["resume_training"],
    "best_checkpoint_dir":cfg["ray_train"]["best_checkpoint_dir"],
    "start_context":cfg["ray_train"]["start_context"],
}


trainer = TorchTrainer(
    train_loop_per_worker=train_loop_per_worker,
    train_loop_config=train_loop_config,
    datasets={
        "train": train_chunked_tokens,
        "validate": validate_chunked_tokens,
    },
    scaling_config=ray.train.ScalingConfig(
        num_workers=cfg["ray_train"]["num_workers"],
        use_gpu=cfg["ray_train"]["use_gpu"],
        resources_per_worker={
            "CPU": cfg["ray_train"]["num_cpus_per_worker"],
            "GPU": cfg["ray_train"]["num_gpus_per_worker"],
        },
    ),
    run_config=train.RunConfig(
        storage_path=cfg["ray_train"]["storage_path"],
        name=cfg["ray_train"]["name"],
    ),
    
)
result: Result = trainer.fit()
print(result.metrics)

2024-08-06 02:10:09,249	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


== Status ==
Current time: 2024-08-06 02:10:09 (running for 00:00:00.11)
Using FIFO scheduling algorithm.
Logical resource usage: 0/32 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 PENDING)


== Status ==
Current time: 2024-08-06 02:10:14 (running for 00:00:05.17)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(TrainTrainable pid=170994) 2024-08-06 02:10:11.792528: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
(TrainTrainable pid=170994) 2024-08-06 02:10:11.800061: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
(TrainTrainable pid=170994) 2024-08-06 02:10:11.808508: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
(TrainTrainable pid=170994) 2024-08-06 02:10:11.811122: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuB

(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

== Status ==
Current time: 2024-08-06 02:10:19 (running for 00:00:10.18)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:10:24 (running for 00:00:15.19)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

== Status ==
Current time: 2024-08-06 02:10:29 (running for 00:00:20.20)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:10:34 (running for 00:00:25.20)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch1:Every effort moves you,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWor

(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:10:39 (running for 00:00:30.21)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

== Status ==
Current time: 2024-08-06 02:10:44 (running for 00:00:35.22)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:10:49 (running for 00:00:40.24)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch2:Every effort moves you,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWor

(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:10:54 (running for 00:00:45.24)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:10:59 (running for 00:00:50.25)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

== Status ==
Current time: 2024-08-06 02:11:04 (running for 00:00:55.27)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch3:Every effort moves you,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWor

(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:11:09 (running for 00:01:00.29)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:11:14 (running for 00:01:05.30)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

== Status ==
Current time: 2024-08-06 02:11:19 (running for 00:01:10.32)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch4:Every effort moves you,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWor

(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:11:24 (running for 00:01:15.35)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:11:29 (running for 00:01:20.37)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

== Status ==
Current time: 2024-08-06 02:11:34 (running for 00:01:25.37)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch5:Every effort moves you,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) And,
(RayTrainWorker pid=171354) And I'll not,
(RayTrainWorker pid=171354) And,
(RayTrainWorker pid=171354) And I,
(RayTrainWorker pid=171354) And,
(RayTrainWorker pid=171354) And,
(RayTrainWorker pid=171354) And,
(RayTrainWorker pid=171354) And,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) And,
(RayTrainWorker pid=171354) And,
(RayTrainWorker pid=171354) And,
(RayTrainWorker pid=171354) 


(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

== Status ==
Current time: 2024-08-06 02:11:39 (running for 00:01:30.46)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:11:44 (running for 00:01:35.47)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

== Status ==
Current time: 2024-08-06 02:11:49 (running for 00:01:40.48)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch6:Every effort moves you have heard
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(R

(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:11:54 (running for 00:01:45.48)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:11:59 (running for 00:01:50.49)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch7:Every effort moves you have heard
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(R

(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:12:04 (running for 00:01:55.52)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:12:09 (running for 00:02:00.53)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch8:Every effort moves you that,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) And so much
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) And that the or your queen,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) And so your brother,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTra

(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:12:14 (running for 00:02:05.58)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:12:19 (running for 00:02:10.59)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch9:Every effort moves you
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWork

(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:12:24 (running for 00:02:15.65)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:12:29 (running for 00:02:20.66)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch10:Every effort moves you,
(RayTrainWorker pid=171354) The people,
(RayTrainWorker pid=171354) Which you will,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) And this in your my state I stay'd the king's face.
(RayTrainWorker pid=171354) And yet I must die than to be so young,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) The blood:
(RayTrainWorker pid=171354) And every mess of death


(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:12:34 (running for 00:02:25.73)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:12:40 (running for 00:02:30.74)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch11:Every effort moves you play it is meet so much more.
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) What my lord! why,
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=17

(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:12:45 (running for 00:02:35.80)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:12:50 (running for 00:02:40.81)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch12:Every effort moves you will keep her father:
(RayTrainWorker pid=171354) Which you love me.
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) I am too old to say that I have worn a constant temper.
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) Do as they lie asleep;
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) Both disobedience and ingratitude
(RayTrainWorker pid=171354) To


(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:12:55 (running for 00:02:45.89)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:13:00 (running for 00:02:50.90)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch13:Every effort moves you, we should have none
(RayTrainWorker pid=171354) shortly, for one would kill the other. Thou! why,
(RayTrainWorker pid=171354) thou wilt quarrel with a man that hath a hair more,
(RayTrainWorker pid=171354) or a hair less, in his beard, than thou hast: thou


(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:13:05 (running for 00:02:55.95)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:13:10 (running for 00:03:00.96)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch14:Every effort moves you,
(RayTrainWorker pid=171354) The people
(RayTrainWorker pid=171354) Like one
(RayTrainWorker pid=171354) Thou shalt not here big for the lightning,
(RayTrainWorker pid=171354) Than dangerous to me;
(RayTrainWorker pid=171354) You are to Plashy too;
(RayTrainWorker pid=171354) I thank thee and my name
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) I now go toward him;
(RayTrainWorker pid=171354) 


(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:13:15 (running for 00:03:06.04)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:13:20 (running for 00:03:11.05)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch15:Every effort moves you live to your bands:
(RayTrainWorker pid=171354) Which you love me.
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) And your men,
(RayTrainWorker pid=171354) QUEEN ELIZABETH:
(RayTrainWorker pid=171354) No less! rather see thee here
(RayTrainWorker pid=171354) And then came I can yield my young cousin,
(RayTrainWorker pid=171354) I amiss of


(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:13:25 (running for 00:03:16.06)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:13:30 (running for 00:03:21.07)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch16:Every effort moves you gave leave to my unwilling tongue
(RayTrainWorker pid=171354) Against my will to do myself this wrong.
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) KING RICHARD II:
(RayTrainWorker pid=171354) Cousin, farewell; and, uncle, bid him so:
(RayTrainWorker pid=171354) Six years we banish him, and he shall


(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:13:35 (running for 00:03:26.08)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:13:40 (running for 00:03:31.09)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch17:Every effort moves you gave leave to my unwilling tongue
(RayTrainWorker pid=171354) Against my will to do myself this wrong.
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) KING RICHARD II:
(RayTrainWorker pid=171354) Cousin, farewell; and, uncle, bid him so:
(RayTrainWorker pid=171354) Six years we banish him, and he shall


(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:13:45 (running for 00:03:36.18)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:13:50 (running for 00:03:41.19)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch18:Every effort moves you gave leave to my unwilling tongue
(RayTrainWorker pid=171354) Against my will to do myself this wrong.
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) KING RICHARD II:
(RayTrainWorker pid=171354) Cousin, farewell; and, uncle, bid him so:
(RayTrainWorker pid=171354) Six years we banish him, and he shall


(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:13:55 (running for 00:03:46.20)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:14:00 (running for 00:03:51.21)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch19:Every effort moves you fear?
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) Against the lords you talk of are beheaded.
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) LORD STANUS:
(RayTrainWorker pid=171354) I brought my tent, and I,
(RayTrainWorker pid=171354) And,
(RayTrainWorker pid=171354) I mean the cause,
(RayTrainWorker pid=171354) But, for you:
(RayTrainWorker pid=171354) But come


(pid=171436) Running 0: 0 bundle [00:00, ? bundle/s]

(SplitCoordinator pid=171436) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171436) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]
(SplitCoordinator pid=171437) Starting execution of Dataset. Full logs are in /tmp/ray/session_2024-08-06_02-10-04_641366_168577/logs/ray-data
(SplitCoordinator pid=171437) Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(TextDocumentProcessor)->Map(TikTokenizer)->FlatMap(ChunkProcessor)] -> OutputSplitter[split(1, equal=True)]


== Status ==
Current time: 2024-08-06 02:14:05 (running for 00:03:56.21)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-08-06 02:14:10 (running for 00:04:01.22)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




(pid=171437) Running 0: 0 bundle [00:00, ? bundle/s]

(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354)  epoch20:Every effort moves you fear?
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) Against the lords you talk of are beheaded.
(RayTrainWorker pid=171354) 
(RayTrainWorker pid=171354) LORD STANUS:
(RayTrainWorker pid=171354) I hope he, sir, might better wear their heads
(RayTrainWorker pid=171354) Were I have been in any liquid thing you will,
(RayTrainWorker pid=171354) But come


== Status ==
Current time: 2024-08-06 02:14:15 (running for 00:04:06.24)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




2024-08-06 02:14:16,483	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/workspaces/CaiZi/outputs/gpt2/124M' in 0.0019s.
2024-08-06 02:14:16,484	INFO tune.py:1041 -- Total run time: 247.24 seconds (247.22 seconds for the tuning loop).


== Status ==
Current time: 2024-08-06 02:14:16 (running for 00:04:07.23)
Using FIFO scheduling algorithm.
Logical resource usage: 2.0/32 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /tmp/ray/session_2024-08-06_02-10-04_641366_168577/artifacts/2024-08-06_02-10-09/124M/driver_artifacts
Number of trials: 1/1 (1 TERMINATED)


{'epoch': 20, 'train_loss': 0.15964282916425027, 'validate_loss': 7.7357090405055455, 'perplexity': 2288.631103515625, 'best_epoch': 5, 'best_perplexity': 149.89683532714844, 'timestamp': 1722910455, 'checkpoint_dir_name': None, 'done': True, 'training_iteration': 20, 'trial_id': '01873_00000', 'date': '2024-08-06_02-14-15', 'time_this_iter_s': 10.420353412628174, 'time_total_s': 242.34613299369812, 'pid': 170994, 'hostname': '32452cd6fad6', 'node_ip': '172.17.0.2', 'config': {'train_loop_config': {'vocab_size': 50257, 'dimension_embedding': 768, 'block_size': 1024, 'num_layers': 12, 'num_headers': 12, 'drop_rate': 0.1, 'qkv_bias': False, 'check_freque