In [1]:
### JAX

# UPDATE/TODO XXX: We can now move to jax24.04-py3 (https://docs.nvidia.com/deeplearning/frameworks/jax-release-notes/rel-24-04.html)
# TODO: this is slightly faster even with the warning -> invewstigate (current jax version is 0.4.26, where the image has 0.4.17)
#! pip install -U "jax[cuda12_pip]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
#2024-05-02 08:16:04.763248: W external/xla/xla/service/gpu/nvptx_compiler.cc:718] 
#The NVIDIA driver's CUDA version is 12.2 which is older than the ptxas CUDA version (12.4.131). 
#Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. 
#You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.

# TODO: It looks like I am suffering from fragmentation on GPU, thus enabling prelocation
# Disable JAX memory preallocation
#import os
#os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"]="false"
#os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]=".90"
#%env XLA_PYTHON_CLIENT_PREALLOCATE=false
%env XLA_PYTHON_CLIENT_MEM_FRACTION=0.95

#!LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
import jax
jax.devices()

env: XLA_PYTHON_CLIENT_MEM_FRACTION=0.95


[cuda(id=0)]

In [2]:
### DATASETs
import datasets
from tokenized_dataset import load_tokenized_dataset_gpt2, load_tokenized_dataset_hellaswag, unpack_hellaswag_x, unpack_hellaswag_batched_x, concatenate_hellaswag_y_and_choice, get_batched_examples, get_batched_examples_packed 
ds, (tokenize, detokenize, tokenizer_vocab_size) = load_tokenized_dataset_gpt2("train[:10%]") #:1% or :1000
ds = ds.train_test_split(test_size=0.01, seed=42) # TODO: put seed in better place? does it mess up with resume_from_checkpoint logic?
ds = datasets.DatasetDict({
    'train': ds['train'],
    'validation': ds['test'] #rename
})
print(ds)

# Some stats on HellaSwag. Given the tokenicer: 
# Max len of concatenated y+longest choice is 149
# Max sum of choices tokens lens is 263 (Important for flattening choices in x + seq_len param for data collactor)
hellaswag_ds = load_tokenized_dataset_hellaswag(tokenize)
print(hellaswag_ds)

# Tests:
# item = next(x for x in hellaswag_ds)
# print(item)
# print(detokenize((item['y'],)))
# item_x = item['x']
# choices, label = unpack_hellaswag_x(item['x'])
# print(detokenize(choices)) # TODO XXX: one of chocies has ", while others have '. Is it anything serious?
# print(label)

Loading FineWeb-Edu dataset


README.md:   0%|          | 0.00/23.3k [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/1630 [00:00<?, ?it/s]

Loading tokenizer bpe_tokenizer_fineweb-edu_sample-10BT_100k_ds_merges_30k.pickle
HotFix: Filter out items containing out-of-vocabulary words
Tokenizing dataset
DatasetDict({
    train: Dataset({
        features: ['x', 'y'],
        num_rows: 952096
    })
    validation: Dataset({
        features: ['x', 'y'],
        num_rows: 9618
    })
})
Loading HellaSwag dataset


README.md:   0%|          | 0.00/6.84k [00:00<?, ?B/s]

hellaswag.py:   0%|          | 0.00/4.36k [00:00<?, ?B/s]

Tokenizing dataset
Dataset({
    features: ['x', 'y'],
    num_rows: 10042
})


In [3]:
### Model
from model_jax import *
import jax.numpy as jnp
from jax import random

LAYERS = 12
model_vocab_size = tokenizer_vocab_size + 3 # add padding token (0) + start of sequence token + end of sequence token 
START_TOK = tokenizer_vocab_size + 1
END_TOK = tokenizer_vocab_size + 2 # TODO: in default LLM convention, it should be 1. Also, it could be part of tokenizer_vocab_size
EMB_DIM=768
FFN_DIM=3072
NUM_HEADS = 12
seq_len= 512 # TODO XXX: 1024 is orginal paper
params = init_transformer_gpt2(model_vocab_size, EMB_DIM, LAYERS, NUM_HEADS, FFN_DIM, seq_len, random.PRNGKey(0))

print(f'Vocabulary size: {model_vocab_size:_}')
print(f'Number of params: {count_num_params(params):_}')

### Loss + Grads + Optimizers
from loss_and_optimizer_jax import loss_train, loss_eval, log_probs, grad_loss, predict, acc_grad_loss, init_adam_w, adam_w_in_place, grads_l2norm, grads_grps_l2norms

# Figure out non bias/gain params, as we only want to apply weight decay to those in AdamW
# Only 1D weights, which are initialized to 0s are bias/gain params (including bias of LayerNorm)
weight_decay_mask = tuple([ tuple([not (item.ndim==1 and all(item==0)) for item in grp]) for grp in params])
print(weight_decay_mask)

Vocabulary size: 35_374
Number of params: 112_614_958
((True, False), (True,), (True, False, True, True, True, False, True, False, True, False), (True, False, True, True, True, False, True, False, True, False), (True, False, True, True, True, False, True, False, True, False), (True, False, True, True, True, False, True, False, True, False), (True, False, True, True, True, False, True, False, True, False), (True, False, True, True, True, False, True, False, True, False), (True, False, True, True, True, False, True, False, True, False), (True, False, True, True, True, False, True, False, True, False), (True, False, True, True, True, False, True, False, True, False), (True, False, True, True, True, False, True, False, True, False), (True, False, True, True, True, False, True, False, True, False), (True, False, True, True, True, False, True, False, True, False), (True, False))


In [4]:
### Infra utils
def print_mem_stats():
    mem_stats = jax.devices()[0].memory_stats()
    conv = lambda k: mem_stats[k] / pow(1000,3)
    print(f'GB in use: {conv("bytes_in_use")}. GB limit: {conv("bytes_limit")}')

import wandb

# start a new wandb run to track this script
if True:
    wandb.init(
        # set the wandb project where this run will be logged
        project="t",
    
        # track hyperparameters and run metadata
        #config={
        #"learning_rate": 0.02,
        #"architecture": "CNN",
        #"dataset": "CIFAR-100",
        #"epochs": 10,
        #}
        sync_tensorboard=True
    )

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
2024-12-03 13:10:40.530267: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733231440.545626     373 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733231440.550366     373 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mwandb[0m: Currently logged in as: [33mmkukla[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
## Training loop
import datetime
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
import itertools
import pickle
import evaluate
import numpy as np # should we get rid of it?
import math

# Infra training params
run_name = datetime.datetime.now().strftime("%h%d_%H-%M-%S")
log_every_steps_multidevice = 10
eval_every_steps_multidevice = 500
eval_n_examples = 4
writer = SummaryWriter(f'/lego/storage/output/runs/{run_name}')
#checkpoint_every_steps = None #500 * 8 machines
checkpoint_every_steps = 4000 #20000 # TODO: move to use _multidevice too
resume_from_checkpoint = None
#resume_from_checkpoint = 'runs/Jun07_10-12-10/checkpoint_4000.pkl' # TODO: Confirm runs from checkpoints are still fully reproducible


# ML training params
key_training = random.PRNGKey(0) 
batch_size= 16 
gradient_accumulations_steps = 16 # TODO XXX: This means effective batch_size=256 instead of 512 used in the paper
num_steps_multidevice = 50000 #30000 #10000 #100000 # TODO XXX: think what it should be for GPT2
max_lr = 0.00025
warmup_steps_multidevice= 2000
betas = (0.9, 0.98) 
epsilon = 10e-9
grads, moments = init_adam_w(params)

# TODO XXX: remove below one
_, _, _, y_eval_mask, _, _, y_eval_indices  = next(get_batched_examples(ds, eval_n_examples, seq_len, START_TOK, END_TOK, "validation")) 
    
i = 0 
ds_train_rows_read = 0
if resume_from_checkpoint is not None:
    with open(resume_from_checkpoint,'rb') as f:
        i, ds_train_rows_read, params, moments, key_training = pickle.load(f)   
        print(f'Resuming training from the checkpoint: i {i} ds_train_rows_read {ds_train_rows_read}')

print(f'Number of params: {count_num_params(params):_}')

num_steps = num_steps_multidevice * gradient_accumulations_steps
while True:
    #for _, batch in tqdm(enumerate(itertools.islice(get_batched_examples(ds, batch_size, seq_len, START_TOK, END_TOK, skip_n_rows = ds_train_rows_read), num_steps)), initial=i, total=num_steps, smoothing=0):
    for _, batch in tqdm(enumerate(itertools.islice(get_batched_examples_packed(ds, batch_size, seq_len, START_TOK, END_TOK, pack_frac=0.75, skip_n_rows = ds_train_rows_read), num_steps)), initial=i, total=num_steps, smoothing=0):
        _, y, _, y_mask, _, _, y_indices = batch
        # Training step
        # TODO: introduce update func, which does grad_loss and adam, and then call/jit that function instead of calling/jitting two separate ones
        key_training, key_iter = random.split(key_training, 2)
        grads, (loss_val, acc, _) = acc_grad_loss(grads, params, jnp.array(y), jnp.array(y_mask), jnp.array(y_indices), key_iter)
        #grads, (loss_val, acc) = grad_loss(params, jnp.array(x), jnp.array(y), key_iter)

        # LR Scheduler
        #lr = max_lr # for SGD

        i_multidevice = i // gradient_accumulations_steps
        is_i_device_zero = i % gradient_accumulations_steps == 0

        # AIAYN:
        #lr = pow(EMB_DIM, -0.5) * min(pow((i_multidevice+1), -0.5), (i_multidevice+1) * pow(warmup_steps, -1.5))

        # GPT1:
        if i_multidevice < warmup_steps_multidevice:
            lr = (i_multidevice+1)/warmup_steps_multidevice * max_lr
        else:
            t_step = i_multidevice - warmup_steps_multidevice
            t_max = num_steps_multidevice - warmup_steps_multidevice
            lr = max_lr * (1 + math.cos(math.pi * t_step/t_max))/2

        #params = sgd(params, grads, lr)
        if i > 0 and i % gradient_accumulations_steps == 0:
            for grp_i in range(len(grads)):
                for p_i in range(len(grads[grp_i])):
                    grads[grp_i][p_i] =  grads[grp_i][p_i].at[:].divide(gradient_accumulations_steps)
            
            #params, moments = adam_w(params, grads, lr, betas, epsilon, moments, i)
            params, moments = adam_w_in_place(params, grads, lr, betas, epsilon, moments, i, weight_decay=0.01, weight_decay_mask=weight_decay_mask)
    
        # Logging:
        if i_multidevice%log_every_steps_multidevice==0 and is_i_device_zero:
            loss_val = loss_val.item()
            acc = acc.item()
            
            grad_norm = grads_l2norm(grads)
            grps_grad_norms = grads_grps_l2norms(grads)

            
            #print(f'iter #{i} loss {loss_val} acc {acc} lr {lr} grad_norm {grad_norm}')
            #print_mem_stats() # TODO: monitor it in tensorboard?
            writer.add_scalar('train/loss', loss_val, i_multidevice)
            writer.add_scalar('train/acc', acc, i_multidevice)
            writer.add_scalar('train/lr', lr, i_multidevice)
            writer.add_scalar('train/grad_norm', grad_norm, i_multidevice)
            for grp_i, grp_grad_norm in enumerate(grps_grad_norms):
                writer.add_scalar(f'train_details/grad_norm_grp_{grp_i}', grp_grad_norm, i_multidevice)

            # TODO: some metrics computed on x, other on y. Make it consistent
            #pad_tokens_prop = sum([y_row.count(0) for y_row in y]) / sum([len(y_row) for y_row in y])
            pad_tokens_prop = np.count_nonzero(y==0) / y.size
            writer.add_scalar('train_data/pad_tokens_prop', pad_tokens_prop, i_multidevice)
            writer.add_scalar('train_data/batch_size', len(y), i_multidevice)
            writer.add_scalar('train_data/batch_seq_len', len(y[0]), i_multidevice)
            writer.add_scalar('train_data/batch_total_tokens', len(y) * len(y[0]), i_multidevice)

        # Zeroed accumulated grads: we have to do it after computing grad norms
        if i > 0 and i % gradient_accumulations_steps == 0: 
            for grp_i in range(len(grads)):
                for p_i in range(len(grads[grp_i])):
                    grads[grp_i][p_i] =  grads[grp_i][p_i].at[:].set(0)
            
        # Evaluation
        if i_multidevice>0 and i_multidevice%eval_every_steps_multidevice==0 and is_i_device_zero:
            val_losses = []
            val_accs = []
            val_toks_props = []
            for eval_step, batch in enumerate(get_batched_examples(ds, batch_size, seq_len, START_TOK, END_TOK, split="validation")): 
                _, y, _, y_mask, _, _, y_indices = batch
                _, (loss_val, acc, toks_prop) = loss_eval(params, jnp.array(y), jnp.array(y_mask), jnp.array(y_indices))
                val_losses.append(loss_val)
                val_accs.append(acc)
                val_toks_props.append(toks_prop)
            writer.add_scalar('eval/loss', jnp.average(jnp.hstack(val_losses), weights = jnp.hstack(val_toks_props)).item(), i_multidevice)
            writer.add_scalar('eval/acc', jnp.average(jnp.hstack(val_accs), weights = jnp.hstack(val_toks_props)).item(), i_multidevice)
            
            # Few predictions TODO XXX: vary temperature -> diff samples
            y_sample = predict(params, jnp.array(y_eval_mask), jnp.array(y_eval_indices), seq_len, START_TOK, END_TOK)
            y_sample = tuple([item.tolist() for item in y_sample])
            def detokenize_y_in(y):
                y_out = y[:, 1:]
                y_out[y_out == END_TOK] = 0
                return detokenize(y_out)
            for detokenized_y_sample in detokenize(y_sample):
                print(f'PREDS: {detokenized_y_sample}\n')

            # Compute HellaSwag score
            print(f'Compute HellaSwag score')
            hellaswag_accs = [] # TODO XXX: enable seq_len be different for x vs y; 
            num_hellaswag_batches = 100 #TODO XXX:; run for the whole dataset
            for _, batch in tqdm(enumerate(itertools.islice(get_batched_examples(hellaswag_ds, batch_size, seq_len, START_TOK, END_TOK, split=None), num_hellaswag_batches))):
                choices_vals = []
                x, y, _, y_mask, _, _, y_indices = batch
                choices, labels = unpack_hellaswag_batched_x(x) 
                
                for choice in choices:
                    y, y_mask = concatenate_hellaswag_y_and_choice(y, choice, END_TOK) # no need to return new y_indices for now.
                    choice_log_probs = log_probs(params, jnp.array(y), jnp.array(y_mask), jnp.array(y_indices))
                    choices_vals.append(choice_log_probs)
                choices_vals = np.array(choices_vals).transpose() # we want choice per column
                hellaswag_accs.extend(np.argmax(choices_vals, axis=1)==labels)
                   
            hellaswag_acc = sum(hellaswag_accs)/len(hellaswag_accs)
            print(f'HellaSwag score:', hellaswag_acc)
            writer.add_scalar('eval/hellaswag', hellaswag_acc, i_multidevice)
                
        i = i + 1
        ds_train_rows_read = ds_train_rows_read + len(y)

        # Checkpointing (i, ds_train_rows_read, params, moments).
        # TODO XXX: I haven't used it for a while, and likely it's not working.. probably we can delete 
        if checkpoint_every_steps is not None and (i>0 and i%checkpoint_every_steps==0):
            import os
            training_state = (i, ds_train_rows_read, params, moments, key_training)
            filename = f'runs/{run_name}/checkpoint_{i}.pkl'
            os.makedirs(os.path.dirname(filename), exist_ok=True)
            with open(filename, 'wb') as f:
                pickle.dump(training_state, f)
                
    ds_train_rows_read=0 # After each epoch, reset dataset pointer

writer.close()



Number of params: 112_614_958


  1%|          | 7999/800000 [56:20<92:57:51,  2.37it/s]

PREDS: the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the f


0it [00:00, ?it/s][A
1it [00:02,  2.96s/it][A
2it [00:03,  1.56s/it][A
3it [00:04,  1.12s/it][A
4it [00:04,  1.10it/s][A
5it [00:05,  1.26it/s][A
6it [00:05,  1.38it/s][A
7it [00:06,  1.46it/s][A
8it [00:07,  1.53it/s][A
9it [00:07,  1.58it/s][A
10it [00:08,  1.61it/s][A
11it [00:08,  1.63it/s][A
12it [00:09,  1.65it/s][A
13it [00:10,  1.66it/s][A
14it [00:10,  1.67it/s][A
15it [00:11,  1.68it/s][A
16it [00:11,  1.69it/s][A
17it [00:12,  1.69it/s][A
18it [00:13,  1.69it/s][A
19it [00:13,  1.69it/s][A
20it [00:14,  1.69it/s][A
21it [00:14,  1.69it/s][A
22it [00:15,  1.69it/s][A
23it [00:15,  1.69it/s][A
24it [00:16,  1.69it/s][A
25it [00:17,  1.69it/s][A
26it [00:17,  1.70it/s][A
27it [00:18,  1.70it/s][A
28it [00:18,  1.69it/s][A
29it [00:19,  1.70it/s][A
30it [00:20,  1.70it/s][A
31it [00:20,  1.70it/s][A
32it [00:21,  1.69it/s][A
33it [00:21,  1.69it/s][A
34it [00:22,  1.69it/s][A
35it [00:23,  1.69it/s][A
36it [00:23,  1.69it/s][A
37it [00:24,  

HellaSwag score: 0.259375


  2%|▏         | 15999/800000 [1:54:50<93:47:20,  2.32it/s]

PREDS: the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time of the time 


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.70it/s][A
3it [00:01,  1.70it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.70it/s][A
6it [00:03,  1.71it/s][A
7it [00:04,  1.70it/s][A
8it [00:04,  1.70it/s][A
9it [00:05,  1.70it/s][A
10it [00:05,  1.70it/s][A
11it [00:06,  1.70it/s][A
12it [00:07,  1.71it/s][A
13it [00:07,  1.71it/s][A
14it [00:08,  1.70it/s][A
15it [00:08,  1.70it/s][A
16it [00:09,  1.70it/s][A
17it [00:09,  1.70it/s][A
18it [00:10,  1.70it/s][A
19it [00:11,  1.70it/s][A
20it [00:11,  1.71it/s][A
21it [00:12,  1.71it/s][A
22it [00:12,  1.71it/s][A
23it [00:13,  1.71it/s][A
24it [00:14,  1.71it/s][A
25it [00:14,  1.71it/s][A
26it [00:15,  1.71it/s][A
27it [00:15,  1.70it/s][A
28it [00:16,  1.70it/s][A
29it [00:17,  1.70it/s][A
30it [00:17,  1.70it/s][A
31it [00:18,  1.70it/s][A
32it [00:18,  1.70it/s][A
33it [00:19,  1.70it/s][A
34it [00:19,  1.70it/s][A
35it [00:20,  1.70it/s][A
36it [00:21,  1.70it/s][A
37it [00:21,  

HellaSwag score: 0.259375


  3%|▎         | 23999/800000 [2:53:20<93:24:49,  2.31it/s]

PREDS: the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of the first time of time of time of time the time of time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time time ti


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.70it/s][A
3it [00:01,  1.66it/s][A
4it [00:02,  1.68it/s][A
5it [00:02,  1.69it/s][A
6it [00:03,  1.70it/s][A
7it [00:04,  1.70it/s][A
8it [00:04,  1.70it/s][A
9it [00:05,  1.70it/s][A
10it [00:05,  1.71it/s][A
11it [00:06,  1.70it/s][A
12it [00:07,  1.71it/s][A
13it [00:07,  1.71it/s][A
14it [00:08,  1.71it/s][A
15it [00:08,  1.71it/s][A
16it [00:09,  1.71it/s][A
17it [00:09,  1.71it/s][A
18it [00:10,  1.71it/s][A
19it [00:11,  1.71it/s][A
20it [00:11,  1.71it/s][A
21it [00:12,  1.71it/s][A
22it [00:12,  1.71it/s][A
23it [00:13,  1.71it/s][A
24it [00:14,  1.71it/s][A
25it [00:14,  1.71it/s][A
26it [00:15,  1.71it/s][A
27it [00:15,  1.71it/s][A
28it [00:16,  1.71it/s][A
29it [00:17,  1.71it/s][A
30it [00:17,  1.71it/s][A
31it [00:18,  1.71it/s][A
32it [00:18,  1.71it/s][A
33it [00:19,  1.70it/s][A
34it [00:19,  1.71it/s][A
35it [00:20,  1.71it/s][A
36it [00:21,  1.71it/s][A
37it [00:21,  

HellaSwag score: 0.259375


  4%|▍         | 31999/800000 [3:51:44<92:41:50,  2.30it/s]

PREDS: the first time that the first time of the year of the year of the year of the year is to be the first time of the year of the year of the year of the year of the year of the year of the year of the year of the year of the year of the year of the year of the year of the year of year of the year of year of the year of year of year and the year of year of year the year of year the year of year is the year of year of year of year and year of year the year of year the year of year the year of year the year of year the year of year is the year of year of year of year and year of year of year of year and year of year of year the year of year year the year of year year and year of year year the year of year year is the year of year of year of year of year and year of year year of year year and year of year year of year year year and year of year year year of year year year and year of year year year year of year year year and year of year year year year year of year year year year and y


0it [00:00, ?it/s][A
1it [00:00,  1.72it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.64it/s][A
4it [00:02,  1.67it/s][A
5it [00:02,  1.68it/s][A
6it [00:03,  1.69it/s][A
7it [00:04,  1.69it/s][A
8it [00:04,  1.70it/s][A
9it [00:05,  1.70it/s][A
10it [00:05,  1.71it/s][A
11it [00:06,  1.70it/s][A
12it [00:07,  1.70it/s][A
13it [00:07,  1.70it/s][A
14it [00:08,  1.70it/s][A
15it [00:08,  1.70it/s][A
16it [00:09,  1.70it/s][A
17it [00:10,  1.70it/s][A
18it [00:10,  1.71it/s][A
19it [00:11,  1.70it/s][A
20it [00:11,  1.70it/s][A
21it [00:12,  1.70it/s][A
22it [00:12,  1.70it/s][A
23it [00:13,  1.70it/s][A
24it [00:14,  1.71it/s][A
25it [00:14,  1.71it/s][A
26it [00:15,  1.71it/s][A
27it [00:15,  1.71it/s][A
28it [00:16,  1.71it/s][A
29it [00:17,  1.71it/s][A
30it [00:17,  1.71it/s][A
31it [00:18,  1.70it/s][A
32it [00:18,  1.71it/s][A
33it [00:19,  1.71it/s][A
34it [00:19,  1.71it/s][A
35it [00:20,  1.71it/s][A
36it [00:21,  1.71it/s][A
37it [00:21,  

HellaSwag score: 0.259375


  5%|▍         | 39999/800000 [4:50:10<91:53:26,  2.30it/s]

PREDS: the following is a brief history of the history of the united states and the united states of america in the united states of america in the united states of america. the united states was the first united states to be the first united states to be the first united states to be the first united states to be the first united states to be the first united states to be the first united states to be the first united states to be the first united states to be the first united states to be the first united states to be the first united states to be the united states to be the first united states to be the first united states to be the united states to be the first united states to be the united states to be the first united states to be the united states to be the first united states to be the united states to be the first united states to be the united states to be the first united states to be the united states to be the first united states to be the united states to be the united s


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.71it/s][A
6it [00:03,  1.71it/s][A
7it [00:04,  1.71it/s][A
8it [00:04,  1.71it/s][A
9it [00:05,  1.71it/s][A
10it [00:05,  1.71it/s][A
11it [00:06,  1.71it/s][A
12it [00:07,  1.71it/s][A
13it [00:07,  1.71it/s][A
14it [00:08,  1.71it/s][A
15it [00:08,  1.71it/s][A
16it [00:09,  1.71it/s][A
17it [00:09,  1.71it/s][A
18it [00:10,  1.71it/s][A
19it [00:11,  1.71it/s][A
20it [00:11,  1.71it/s][A
21it [00:12,  1.71it/s][A
22it [00:12,  1.71it/s][A
23it [00:13,  1.71it/s][A
24it [00:14,  1.71it/s][A
25it [00:14,  1.71it/s][A
26it [00:15,  1.71it/s][A
27it [00:15,  1.71it/s][A
28it [00:16,  1.71it/s][A
29it [00:16,  1.71it/s][A
30it [00:17,  1.71it/s][A
31it [00:18,  1.71it/s][A
32it [00:18,  1.71it/s][A
33it [00:19,  1.71it/s][A
34it [00:19,  1.71it/s][A
35it [00:20,  1.71it/s][A
36it [00:21,  1.71it/s][A
37it [00:21,  

HellaSwag score: 0.259375


  6%|▌         | 44626/800000 [5:24:54<91:39:45,  2.29it/s]
  6%|▌         | 47999/800000 [23:39<87:54:13,  2.38it/s]

PREDS: the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the first of the f


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.68it/s][A
4it [00:02,  1.69it/s][A
5it [00:02,  1.69it/s][A
6it [00:03,  1.70it/s][A
7it [00:04,  1.70it/s][A
8it [00:04,  1.71it/s][A
9it [00:05,  1.71it/s][A
10it [00:05,  1.71it/s][A
11it [00:06,  1.71it/s][A
12it [00:07,  1.71it/s][A
13it [00:07,  1.71it/s][A
14it [00:08,  1.71it/s][A
15it [00:08,  1.71it/s][A
16it [00:09,  1.71it/s][A
17it [00:09,  1.71it/s][A
18it [00:10,  1.71it/s][A
19it [00:11,  1.71it/s][A
20it [00:11,  1.71it/s][A
21it [00:12,  1.71it/s][A
22it [00:12,  1.71it/s][A
23it [00:13,  1.71it/s][A
24it [00:14,  1.71it/s][A
25it [00:14,  1.71it/s][A
26it [00:15,  1.71it/s][A
27it [00:15,  1.70it/s][A
28it [00:16,  1.71it/s][A
29it [00:16,  1.70it/s][A
30it [00:17,  1.71it/s][A
31it [00:18,  1.71it/s][A
32it [00:18,  1.71it/s][A
33it [00:19,  1.71it/s][A
34it [00:19,  1.71it/s][A
35it [00:20,  1.71it/s][A
36it [00:21,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


  7%|▋         | 53149/800000 [1:02:03<90:38:33,  2.29it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

 21%|██        | 167999/800000 [4:08:40<76:46:08,  2.29it/s]

PREDS: the first step in the process of developing a new product is to develop a new product that is more efficient and efficient. the first step in the process of developing a new product is to develop a new product that is more efficient and efficient. the first step in the process of developing a new product is to develop a new product that is more efficient and efficient. the first step in the process of developing a new product is to develop a new product that is more efficient and efficient. the first step in the process of developing a new product is to develop a new product that is more efficient and efficient. the first step in the process of developing a new product is to develop a new product that is more efficient and efficient. the second step in the process of developing a new product is to develop a new product that is more efficient and efficient. the first step in the process of developing a new product is to develop a new product that is more efficient and efficient. 


0it [00:00, ?it/s][A
1it [00:00,  1.72it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 22%|██▏       | 175999/800000 [5:07:10<75:50:44,  2.29it/s]

PREDS: the first step in the process of creating a new document is to create a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy to create. this is done by creating a new document that is easy


0it [00:00, ?it/s][A
1it [00:00,  1.72it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.65it/s][A
34it [00:19,  1.67it/s][A
35it [00:20,  1.69it/s][A
36it [00:20,  1.70it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 22%|██▏       | 178504/800000 [5:26:59<75:53:54,  2.27it/s]
 23%|██▎       | 183999/800000 [38:32<72:00:09,  2.38it/s]

PREDS: the first of the three major types of the “great gatsby” are the great gatsby, the great gatsby, the great gatsby, and the great gatsby. the great gatsby is a great gatsby that is built on the foundation of the great gatsby and is built on the foundation of the great gatsby and is built on the foundation of the great gatsby. the great gatsby is built on the foundation of the great gatsby and is built on the foundation of the great gatsby and is built on the foundation of the great gatsby. the great gatsby is built on the foundation of the great gatsby and is built on the foundation of the great gatsby and is built on the foundation of the great gatsby. the great gatsby is built on the foundation of the great gatsby and is built on the foundation of the great gatsby and is built on the foundation of the great gatsby. the great gatsby is built on the foundation of the great gatsby and is built on the foundation of the great gatsby and is built on the foundation of the great gatsby


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 24%|██▍       | 191999/800000 [1:37:01<72:51:27,  2.32it/s]

PREDS: the first step in the process of creating a new product is to create a new product that is compatible with the existing product. the product is then tested and tested to ensure that it meets the requirements of the new product and meets the requirements of the new product. the product is then tested to ensure that it meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the new product and meets the requirements of the 


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 25%|██▍       | 199999/800000 [2:35:25<72:18:29,  2.30it/s]

PREDS: the first of the three major types of cancer is the most common type of cancer in the united states. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, and colon cancer. the most common type of cancer is the lung, breast, 


0it [00:00, ?it/s][A
1it [00:00,  1.72it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 26%|██▌       | 207999/800000 [3:33:55<71:33:47,  2.30it/s]

PREDS: the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when you think of the first thing that comes to mind when


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.71it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 27%|██▋       | 215999/800000 [4:32:21<70:42:12,  2.29it/s]

PREDS: the first step in the process of creating a new project is to create a new project that is easy to navigate. this is a great way to get started and get started on your project. the first step is to create a new project that is easy to navigate. this is a great way to get started on your project. the first step is to create a new project that is easy to navigate. this is a great way to get started on your project. the second step is to create a new project that is easy to navigate. this is a great way to get started on your project. the first step is to create a new project that is easy to navigate. this is a great way to get started on your project. the first step is to create a new project that is easy to navigate. this is a great way to get started on your project. this is a great way to get started on your project. the second step is to create a new project that is easy to navigate. this is a great way to get started on your project. this is a great way to get started on your


0it [00:00, ?it/s][A
1it [00:00,  1.68it/s][A
2it [00:01,  1.70it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.71it/s][A
6it [00:03,  1.71it/s][A
7it [00:04,  1.71it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.71it/s][A
11it [00:06,  1.71it/s][A
12it [00:07,  1.71it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.71it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 28%|██▊       | 223130/800000 [5:24:33<69:55:24,  2.29it/s]
 28%|██▊       | 223999/800000 [06:18<69:45:58,  2.29it/s]

PREDS: the first thing you need to do is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a good night’s sleep. the best way to get a good night’s sleep is to get a g


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.71it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.71it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.71it/s][A
11it [00:06,  1.71it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.71it/s][A
16it [00:09,  1.71it/s][A
17it [00:09,  1.71it/s][A
18it [00:10,  1.71it/s][A
19it [00:11,  1.71it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 29%|██▉       | 231999/800000 [1:04:42<69:04:31,  2.28it/s]

PREDS: the first thing you need to know about the history of the united states is that it was a country that was founded by the united states of america. the first american president was john adams, who was a member of the continental congress and was the first president to be elected to the united states of america. the first president was john adams, who was a member of the continental congress and was the first president to be elected to the united states. the first president was john adams, who was a member of the continental congress and was the first president to be elected to the united states. the first president was john adams, who was a member of the continental congress and was the first president to be elected to the united states. the first president was john adams, who was a member of the continental congress and was the first president to be elected to the united states. the first president was john adams, who was a member of the continental congress and was the first pr


0it [00:00, ?it/s][A
1it [00:00,  1.70it/s][A
2it [00:01,  1.70it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 30%|██▉       | 239999/800000 [2:03:09<68:08:17,  2.28it/s]

PREDS: the first step in the process of creating a new project is to create a new project that is unique and unique. the first step in creating a new project is to create a new project. this is a process that is unique and unique. the first step in creating a new project is to create a new project. this is a process that is unique and unique. the first step in creating a new project is to create a new project. this is a process that is unique and unique. the first step in creating a new project is to create a new project. this is a process that is unique and unique. the first step in creating a new project is to create a new project. this is a process that is unique and unique. the second step in creating a new project is to create a new project. this is a process that is unique and unique. the third step in creating a new project is to create a new project. this is a process that is unique and unique. the third step in creating a new project is to create a new project. this is a proce


0it [00:00, ?it/s][A
1it [00:00,  1.70it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.71it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 31%|███       | 247999/800000 [3:01:33<67:09:46,  2.28it/s]

PREDS: the first step in the process of creating a new product is to create a new product that is unique and unique. the first step is to create a new product that is unique and unique. the first step is to create a new product that is unique and unique. the first step is to create a new product that is unique and unique. the first step is to create a new product that is unique and unique. the first step is to create a new product that is unique and unique. the first step is to create a new product that is unique and unique. the second step is to create a new product that is unique and unique. the third step is to create a new product that is unique and unique. the third step is to create a new product that is unique and unique. the third step is to create a new product that is unique and unique. the third step is to create a new product that is unique and unique. the third step is to create a new product that is unique and unique. the third step is to create a new product that is uniq


0it [00:00, ?it/s][A
1it [00:00,  1.72it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.71it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 32%|███▏      | 255999/800000 [3:59:59<66:12:07,  2.28it/s]

PREDS: the first step in the process of creating a new product is to create a new product that is easy to use. this is done by creating a new product that is easy to use and easy to use. the first step in the process of creating a new product is to create a new product that is easy to use. this is done by creating a new product that is easy to use. this is done by creating a new product that is easy to use. this is done by creating a new product that is easy to use. this is done by creating a new product that is easy to use. this is done by creating a new product that is easy to use. this is done by creating a new product that is easy to use. this is done by creating a new product that is easy to use. this is done by creating a new product that is easy to use. this is done by creating a new product that is easy to use. this is done by creating a new product that is easy to use. this is done by creating a new product that is easy to use. this is done by creating a new product that is ea


0it [00:00, ?it/s][A
1it [00:00,  1.72it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.71it/s][A
6it [00:03,  1.71it/s][A
7it [00:04,  1.68it/s][A
8it [00:04,  1.69it/s][A
9it [00:05,  1.70it/s][A
10it [00:05,  1.70it/s][A
11it [00:06,  1.70it/s][A
12it [00:07,  1.71it/s][A
13it [00:07,  1.71it/s][A
14it [00:08,  1.71it/s][A
15it [00:08,  1.69it/s][A
16it [00:09,  1.70it/s][A
17it [00:09,  1.71it/s][A
18it [00:10,  1.71it/s][A
19it [00:11,  1.71it/s][A
20it [00:11,  1.71it/s][A
21it [00:12,  1.71it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.70it/s][A
24it [00:14,  1.70it/s][A
25it [00:14,  1.71it/s][A
26it [00:15,  1.71it/s][A
27it [00:15,  1.71it/s][A
28it [00:16,  1.71it/s][A
29it [00:16,  1.71it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.71it/s][A
32it [00:18,  1.71it/s][A
33it [00:19,  1.71it/s][A
34it [00:19,  1.71it/s][A
35it [00:20,  1.71it/s][A
36it [00:21,  1.71it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 32%|███▏      | 259635/800000 [4:27:47<66:04:00,  2.27it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

 46%|████▌     | 367999/800000 [1:19:28<52:03:40,  2.30it/s]

PREDS: the following is a list of the most common causes of a broken tooth that can be caused by a broken tooth. the most common causes of a broken tooth are a broken tooth that is not properly cleaned or replaced by a root canal or root canal treatment. the most common causes of a broken tooth are a broken tooth that is not properly cleaned or replaced by a root canal treatment. the most common causes of a broken tooth are a broken tooth that is not properly cleaned or replaced by a root canal treatment. the most common causes of a broken tooth are a broken tooth that is not properly cleaned or replaced by a root canal treatment. the most common causes of a broken tooth are a broken tooth that is not properly cleaned or replaced by a root canal treatment. the most common causes of a broken tooth are a broken tooth that is not properly cleaned or replaced by a root canal treatment. the most common causes of a broken tooth are a broken tooth that is not properly cleaned or replaced by a


0it [00:00, ?it/s][A
1it [00:00,  1.72it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 47%|████▋     | 375999/800000 [2:17:49<51:17:00,  2.30it/s]

PREDS: the first step in the process of creating a new product is to identify the target market. this can be done by looking at the market and the market for the product. the market for the product is the market for the product and the market for the product is the market for the product. the market for the product is the market for the product and the market for the product is the market for the product. the market for the product is the market for the product and the market for the product is the market for the product. the market for the product is the market for the product and the market for the product is the market for the product. the market for the product is the market for the product and the market for the product is the market for the product. the market for the product is the market for the product and the market for the product is the market for the product. the market for the product is the market for the product and the market for the product is the market for the produ


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 48%|████▊     | 383999/800000 [3:16:18<50:25:37,  2.29it/s]

PREDS: the first step in the process of creating a new project is to create a new project. this is the process of creating a new project and then creating a new project. this process is called the “project plan” and is the process of creating a new project. this is the process of creating a new project and then creating a new project. this is the process of creating a new project and then creating a new project. this is the process of creating a new project. this is the process of creating a new project. this is the process of creating a new project. this is the process of creating a new project. this is the process of creating a new project. this is the process of creating a new project. this is the process of creating a new project. this is the process of creating a new project. this is the process of creating a new project. this is the process of creating a new project. this is the process of creating a new project. this is the process of creating a new project. this is the process 


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.71it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.71it/s][A
35it [00:20,  1.71it/s][A
36it [00:20,  1.71it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 49%|████▉     | 391999/800000 [4:14:42<49:29:57,  2.29it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be completed in a few months. this is the process of creating a new project that will be completed in a few months. this is the process of creating a new project that will be completed in a few months. this is the process of creating a new project that will be completed in a few months. this is the process of creating a new project that will be completed in a few months. this is the process of creating a new project that will be completed in a few months. this is the process of creating a new project that will be completed in a few months. this is the process of creating a new project that will be completed in a few months. this is the process of creating a new project that will be completed in a few months. this is the process of creating a new project that will be completed in a few months. this is the process of creating a new project that will be completed in a few months. this is th


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.71it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.71it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 50%|████▉     | 399999/800000 [5:13:08<48:33:36,  2.29it/s]

PREDS: the following is a list of the most common causes of skin cancer. the most common causes of skin cancer are basal cell carcinoma, squamous cell carcinoma, and melanoma. the most common types of skin cancer are basal cell carcinoma, squamous cell carcinoma, and melanoma. the most common types of skin cancer are basal cell carcinoma, squamous cell carcinoma, and melanoma. the most common types of skin cancer are basal cell carcinoma, squamous cell carcinoma, and melanoma. the most common types of skin cancer are basal cell carcinoma, squamous cell carcinoma, and melanoma. the most common types of skin cancer are basal cell carcinoma, squamous cell carcinoma, and melanoma. the most common types of skin cancer are basal cell carcinoma, squamous cell carcinoma, and melanoma. the most common types of skin cancer are basal cell carcinoma, squamous cell carcinoma, and melanoma. the most common types of skin cancer are basal cell carcinoma, squamous cell carcinoma, and melanoma. the most


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.71it/s][A
22it [00:12,  1.71it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 50%|█████     | 401634/800000 [5:26:56<48:38:32,  2.27it/s]
 51%|█████     | 407999/800000 [44:36<45:46:55,  2.38it/s]

PREDS: the first step in the process of creating a new project is to create a new project. this is done by creating a new project and creating a new project. the first step is to create a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project. this is done by creating a new project.


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.71it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.71it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.71it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.71it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.71it/s][A
25it [00:14,  1.71it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.71it/s][A
28it [00:16,  1.71it/s][A
29it [00:16,  1.71it/s][A
30it [00:17,  1.71it/s][A
31it [00:18,  1.71it/s][A
32it [00:18,  1.71it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.71it/s][A
36it [00:20,  1.71it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 52%|█████▏    | 415999/800000 [1:43:02<45:54:24,  2.32it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be completed in less time. this is done by creating a new project that will be able to be completed in less time. this is done by creating a new project that will be able to be completed in less time. this is done by creating a new project that will be able to be completed in less time. this is done by creating a new project that will be able to be completed in less time. this is done by creating a new project that will be able to be completed in less time. this is done by creating a new project that will be able to be completed in less time. this is done by creating a new project that will be able to be completed in less time. this is done by creating a new project that will be able to be completed in less time. this is done by creating a new project that will be able to be completed in less time. this is done by creating a new project that will be able to be completed in les


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.71it/s][A
16it [00:09,  1.71it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.71it/s][A
32it [00:18,  1.71it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.71it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 53%|█████▎    | 423999/800000 [2:41:26<45:14:06,  2.31it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.71it/s][A
6it [00:03,  1.71it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 54%|█████▍    | 431999/800000 [3:39:52<44:24:43,  2.30it/s]
49it [00:28,  1.72it/s][A
50it [00:29,  1.72it/s][A
51it [00:29,  1.72it/s][A
52it [00:30,  1.72it/s][A
53it [00:30,  1.72it/s][A
54it [00:31,  1.72it/s][A
55it [00:32,  1.72it/s][A
56it [00:32,  1.72it/s][A
57it [00:33,  1.72it/s][A
58it [00:33,  1.72it/s][A
59it [00:34,  1.72it/s][A
60it [00:34,  1.72it/s][A
61it [00:35,  1.72it/s][A
62it [00:36,  1.72it/s][A
63it [00:36,  1.72it/s][A
64it [00:37,  1.72it/s][A
65it [00:37,  1.72it/s][A
66it [00:38,  1.72it/s][A
67it [00:39,  1.72it/s][A
68it [00:39,  1.72it/s][A
69it [00:40,  1.72it/s][A
70it [00:40,  1.72it/s][A
71it [00:41,  1.72it/s][A
72it [00:41,  1.72it/s][A
73it [00:42,  1.72it/s][A
74it [00:43,  1.72it/s][A
75it [00:43,  1.72it/s][A
76it [00:44,  1.72it/s][A
77it [00:44,  1.72it/s][A
78it [00:45,  1.72it/s][A
79it [00:45,  1.72it/s][A
80it [00:46,  1.72it/s][A
81it [00:47,  1.72it/s][A
82it [00:47,  1.72it/s][A
83it [00:48,  1.72it/

HellaSwag score: 0.259375


 55%|█████▍    | 437683/800000 [4:22:00<43:53:22,  2.29it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

 68%|██████▊   | 543999/800000 [1:02:02<31:11:39,  2.28it/s]
58it [00:33,  1.72it/s][A
59it [00:34,  1.72it/s][A
60it [00:34,  1.72it/s][A
61it [00:35,  1.72it/s][A
62it [00:36,  1.72it/s][A
63it [00:36,  1.72it/s][A
64it [00:37,  1.72it/s][A
65it [00:37,  1.72it/s][A
66it [00:38,  1.72it/s][A
67it [00:38,  1.72it/s][A
68it [00:39,  1.72it/s][A
69it [00:40,  1.72it/s][A
70it [00:40,  1.72it/s][A
71it [00:41,  1.72it/s][A
72it [00:41,  1.72it/s][A
73it [00:42,  1.72it/s][A
74it [00:43,  1.72it/s][A
75it [00:43,  1.72it/s][A
76it [00:44,  1.72it/s][A
77it [00:44,  1.72it/s][A
78it [00:45,  1.72it/

HellaSwag score: 0.259375


 69%|██████▊   | 549737/800000 [1:44:28<30:38:09,  2.27it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

 71%|███████   | 567999/800000 [3:57:16<28:14:31,  2.28it/s]

PREDS: the first step in the process of creating a new project is to create a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and th


0it [00:00, ?it/s][A
1it [00:00,  1.72it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.71it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.71it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 72%|███████▏  | 575999/800000 [4:55:44<27:16:11,  2.28it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the second step in creating a new project is to create a new project that will be able to be used in the future. the third step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will


0it [00:00, ?it/s][A
1it [00:00,  1.68it/s][A
2it [00:01,  1.70it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 73%|███████▎  | 580138/800000 [5:27:05<26:51:32,  2.27it/s]
 73%|███████▎  | 583999/800000 [27:07<25:17:31,  2.37it/s]

PREDS: the first step in the process of creating a new project is to create a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and th


0it [00:00, ?it/s][A
1it [00:00,  1.72it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 74%|███████▍  | 591999/800000 [1:25:31<24:59:46,  2.31it/s]

PREDS: the first step in the process of creating a new project is to create a new project. this is done by creating a new project and then creating a new project. the first step in creating a new project is to create a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then 


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 75%|███████▍  | 599999/800000 [2:23:57<24:09:43,  2.30it/s]

PREDS: the first step in the process of creating a new project is to create a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and then creating a new project. this is done by creating a new project and th


0it [00:00, ?it/s][A
1it [00:00,  1.72it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.71it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.71it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 76%|███████▌  | 607999/800000 [3:22:27<23:15:15,  2.29it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the second step in creating a new project is to create a new project that will be able to be used in the future. the third step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.71it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.71it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 77%|███████▋  | 615999/800000 [4:20:51<22:18:27,  2.29it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be submitted to the project team. this is a very important step in the process of creating a new project. the project team will need to create a new project that will be submitted to the project team. this is a very important step in the process of creating a new project. the project team will need to create a new project that will be submitted to the project team. this is a very important step in the process of creating a new project. the project team will need to create a new project that will be submitted to the project team. this is a very important step in the process of creating a new project. the project team will need to create a new project that will be submitted to the project team. this is a very important step in the process of creating a new project. the project team will need to create a new project that will be submitted to the project team. this is a very important step i


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.71it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 78%|███████▊  | 623999/800000 [5:19:21<21:21:30,  2.29it/s]

PREDS: the first step in the process of creating a new project is to create a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project and creating a new project. this is done by creating a new project


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.71it/s][A
6it [00:03,  1.71it/s][A
7it [00:04,  1.71it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.71it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.71it/s][A
30it [00:17,  1.71it/s][A
31it [00:18,  1.71it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 78%|███████▊  | 624764/800000 [5:27:08<21:24:35,  2.27it/s]
 79%|███████▉  | 631999/800000 [50:39<19:36:29,  2.38it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the second step in creating a new project is to create a new project that will be able to be used in the future. the third step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will 


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.71it/s][A
5it [00:02,  1.71it/s][A
6it [00:03,  1.71it/s][A
7it [00:04,  1.71it/s][A
8it [00:04,  1.71it/s][A
9it [00:05,  1.71it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.71it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.71it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.71it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 80%|███████▉  | 639999/800000 [1:49:10<19:06:29,  2.33it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the second step in creating a new project is to create a new project that will be able to be used in the future. the third step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will 


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.71it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 81%|████████  | 647999/800000 [2:47:40<18:16:52,  2.31it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. the first step in the process of creating a new project is to create a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. the second step in the process of creating a new project is to create a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new project that will be able to be used in the future. this is done by creating a new


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.69it/s][A
8it [00:04,  1.70it/s][A
9it [00:05,  1.71it/s][A
10it [00:05,  1.71it/s][A
11it [00:06,  1.71it/s][A
12it [00:07,  1.71it/s][A
13it [00:07,  1.71it/s][A
14it [00:08,  1.71it/s][A
15it [00:08,  1.71it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:14,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 81%|████████▏ | 650781/800000 [3:09:26<18:06:30,  2.29it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

 82%|████████▏ | 655999/800000 [3:46:03<17:22:12,  2.30it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the second step in creating a new project is to create a new project that will be able to be used in the future. the third step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 83%|████████▎ | 663999/800000 [4:44:30<16:26:11,  2.30it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be used in the future. the project will be created by the project manager and the project team will be able to create a new project that will be able to be used in the future. the project manager will be responsible for the creation of the project and will be responsible for the creation of the project budget. the project manager will be responsible for the creation of the project budget and will be responsible for the creation of the project budget. the project manager will be responsible for the creation of the project budget and will be responsible for the creation of the project budget. the project manager will be responsible for the creation of the project budget and will be responsible for the creation of the project budget. the project manager will be responsible for the creation of the project budget and will be responsible for the creation of the project budget. the p


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.71it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


 84%|████████▎ | 669390/800000 [5:24:37<15:50:05,  2.29it/s]
 84%|████████▍ | 670741/800000 [09:25<15:01:44,  2.39it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

100%|█████████▉| 799999/800000 [5:01:43<00:00,  2.28it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the second step in creating a new project is to create a new project that will be able to be used in the future. the third step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will 


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.71it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.71it/s][A
6it [00:03,  1.71it/s][A
7it [00:04,  1.71it/s][A
8it [00:04,  1.71it/s][A
9it [00:05,  1.71it/s][A
10it [00:05,  1.71it/s][A
11it [00:06,  1.71it/s][A
12it [00:07,  1.71it/s][A
13it [00:07,  1.71it/s][A
14it [00:08,  1.71it/s][A
15it [00:08,  1.71it/s][A
16it [00:09,  1.71it/s][A
17it [00:09,  1.71it/s][A
18it [00:10,  1.71it/s][A
19it [00:11,  1.71it/s][A
20it [00:11,  1.71it/s][A
21it [00:12,  1.71it/s][A
22it [00:12,  1.71it/s][A
23it [00:13,  1.71it/s][A
24it [00:14,  1.71it/s][A
25it [00:14,  1.71it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.71it/s][A
28it [00:16,  1.71it/s][A
29it [00:16,  1.71it/s][A
30it [00:17,  1.71it/s][A
31it [00:18,  1.71it/s][A
32it [00:18,  1.71it/s][A
33it [00:19,  1.71it/s][A
34it [00:19,  1.71it/s][A
35it [00:20,  1.71it/s][A
36it [00:21,  1.71it/s][A
37it [00:21,  

HellaSwag score: 0.259375


800409it [5:07:01,  2.27it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

919999it [3:19:35,  2.29it/s]

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the first step in creating a new project is to create a new project that will be able to be used in the future. the second step in creating a new project is to create a new project that will be able to be used in the future. the third step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will be able to be used in the future. the fourth step in creating a new project is to create a new project that will 


0it [00:00, ?it/s][A
1it [00:00,  1.71it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:17,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.73it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


921087it [3:29:35,  2.27it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

931693it [4:46:07,  2.28it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

1054447it [3:23:28,  2.30it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec

PREDS: the first step in the process of creating a new project is to create a new project that will be able to be used in the future. the project will be created by the project team and will be able to work on the project in a variety of ways. the project team will be able to work on the project in a variety of ways and will be able to work on it in a variety of ways. the project team will be able to work on the project in a variety of ways and will be able to work on it in a variety of ways. the project team will be able to work on the project in a variety of ways and will be able to work on it in a variety of ways. the project team will be able to work on the project in a variety of ways and will be able to work on it in a variety of ways. the project team will be able to work on the project in a variety of ways and will be able to work on it in a variety of ways. the project team will be able to work on the project in a variety of ways and will be able to work on it in a variety of 


0it [00:00, ?it/s][A
1it [00:00,  1.72it/s][A
2it [00:01,  1.72it/s][A
3it [00:01,  1.72it/s][A
4it [00:02,  1.72it/s][A
5it [00:02,  1.72it/s][A
6it [00:03,  1.72it/s][A
7it [00:04,  1.72it/s][A
8it [00:04,  1.72it/s][A
9it [00:05,  1.72it/s][A
10it [00:05,  1.72it/s][A
11it [00:06,  1.72it/s][A
12it [00:06,  1.72it/s][A
13it [00:07,  1.72it/s][A
14it [00:08,  1.72it/s][A
15it [00:08,  1.72it/s][A
16it [00:09,  1.72it/s][A
17it [00:09,  1.72it/s][A
18it [00:10,  1.72it/s][A
19it [00:11,  1.72it/s][A
20it [00:11,  1.72it/s][A
21it [00:12,  1.72it/s][A
22it [00:12,  1.72it/s][A
23it [00:13,  1.72it/s][A
24it [00:13,  1.72it/s][A
25it [00:14,  1.72it/s][A
26it [00:15,  1.72it/s][A
27it [00:15,  1.72it/s][A
28it [00:16,  1.72it/s][A
29it [00:16,  1.72it/s][A
30it [00:17,  1.72it/s][A
31it [00:18,  1.72it/s][A
32it [00:18,  1.72it/s][A
33it [00:19,  1.72it/s][A
34it [00:19,  1.72it/s][A
35it [00:20,  1.72it/s][A
36it [00:20,  1.72it/s][A
37it [00:21,  

HellaSwag score: 0.259375


1064541it [4:39:04,  2.28it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

1154543it [4:44:11,  2.28it/s]

In [None]:
# FOR TESTING

# Compute HellaSwag score
import numpy as np

hellaswag_accs = []
from tqdm import tqdm
import itertools
for _, batch in tqdm(enumerate(itertools.islice(get_batched_examples(hellaswag_ds, 2, seq_len, START_TOK, END_TOK, split=None), 4))): 
#for _, batch in tqdm(enumerate(get_batched_examples(hellaswag_ds, 1, 400, START_TOK, END_TOK, split=None))):
    choices_vals = []
    x, y, _, y_mask, _, _, y_indices = batch
    choices, labels = unpack_hellaswag_batched_x(x)
    
    for choice in choices:
        y, y_mask = concatenate_hellaswag_y_and_choice(y, choice, END_TOK) # no need to return new y_indices for now.
        choice_log_probs = log_probs(params, jnp.array(y), jnp.array(y_mask), jnp.array(y_indices))
        choices_vals.append(choice_log_probs)
    choices_vals = np.array(choices_vals).transpose()
    hellaswag_accs.extend(np.argmax(choices_vals, axis=1)==labels)

#print("hellaswag_accs", hellaswag_accs)
hellaswag_acc = sum(hellaswag_accs)/len(hellaswag_accs)
print(hellaswag_acc)


In [None]:
### Final test predictions + BLEU computation
x_tokens_per_batch = 15000 #For variable batch len, we don't use it as we can fit less data (paper does 25k)

print(f'Few predictions for validation dataset')
y_sample = predict(params, jnp.array(x_eval), seq_len, START_TOK, END_TOK)
y_sample = tuple([item.tolist() for item in y_sample])
for detekonized_x_eval, detokenized_y_eval, detokenized_y_sample in zip(detokenize(x_eval), detokenize(y_eval), detokenize(y_sample)):
    print(f'X:{detekonized_x_eval}\tY: {detokenized_y_eval} \tPREDS: {detokenized_y_sample}\n')
    references.append(detokenized_y_eval)
    predictions.append(detokenized_y_sample)

print(f'Computing BLEU for validation dataset')
import evaluate
references = [] 
predictions = []
for _, (x, y) in tqdm(enumerate(get_batched_examples_per_length(ds, x_tokens_per_batch, split="validation"))):
    y_sample = predict(params, jnp.array(x), seq_len, START_TOK, END_TOK)
    y_sample = tuple([item.tolist() for item in y_sample])
    for detekonized_x_eval, detokenized_y_eval, detokenized_y_sample in zip(detokenize(x), detokenize(y), detokenize(y_sample)):
        references.append(detokenized_y_eval)
        predictions.append(detokenized_y_sample)

bleu = evaluate.load("bleu")
results = bleu.compute(predictions=predictions, references=references)
print(results)