In [9]:
from vllm import LLM, SamplingParams
from dotenv import load_dotenv
from lark import Lark, exceptions
from lark.indenter import Indenter
from random import randint
import time
import sympy as sp


import json
import os

  from .autonotebook import tqdm as notebook_tqdm
2024-10-22 19:11:49,764	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [1]:
EXPERIMENT_TYPE = "gbd+fewshots"
SAMPLES = 10
MAX_TOKENS = 100
MODEL = "study-hjt/Meta-Llama-3-70B-Instruct-GPTQ-Int8"

In [7]:
# Auxiliar functions
class Generation:
    def __init__(self, seed, elapsed_time, gen):
        """
        arrival_time: The time when the request arrived. 
         first_scheduled_time: The time when the request was first scheduled. 
         first_token_time: The time when the first token was generated. 
         time_in_queue: The time the request spent in the queue. 
         finished_time: The time when the request was finished. 
        """
        # Elapsed time its based on vLLM values finished_time - arrival_time
        self.seed = seed
        self.elapsed_time = elapsed_time
        self.gen = gen

class Result:
    def __init__(self, seeds: list[int], elapsed_time_gen: list[int], syntax_validation: list[int], semantic_validation: list[int]):
        self.model = MODEL
        self.experiment_type = EXPERIMENT_TYPE
        self.samples = SAMPLES
        self.max_tokens= MAX_TOKENS
        self.seeds = [seed for seed in seeds[:SAMPLES]]
        self.elapsed_time_gen = [elapsed for elapsed in elapsed_time_gen[:SAMPLES]]
        self.syntax_validation = syntax_validation
        self.semantic_validation = semantic_validation

    def save(self):
        with open(f"./results/{self.experiment_type}/{self.samples}e_{self.max_tokens}t.jsonl", 'w') as file:
            json_line = json.dumps(self.__dict__)
            file.write(json_line + "\n")
        
    
    def load():
        with open(f"./{EXPERIMENT_TYPE}/{SAMPLES}e_{MAX_TOKENS}t.jsonl", "r") as file:
            for line in file:
                results = json.loads(line, object_hook=result_encoder)
            return results
    
def result_encoder(r):
    return Result(model=r['model'], experiment_type=r['experiment_type'], samples=r['samples'], max_tokens=r['max_tokens'],
                    seeds=r['seeds'], syntax_validation= r['syntax_validation'], semantic_validation=r['semantic_validation'])

def gen_encoder(g):
    return Generation(seed=g['seed'], elapsed_time=g['elapsed_time'], gen=g['gen'])

def fixed_seeds():
    fixed_seeds = []
    with open(f"./samples/study-hjt::Meta-Llama-3-70B-Instruct-GPTQ-Int8/gbd/100e_100t.jsonl", "r") as file:
        for line in file:
            gen = json.loads(line, object_hook=gen_encoder)
            fixed_seeds.append(gen.seed)
    return fixed_seeds

def semantic_test(generation: str):
    sp.sympify(generation)

def syntax_text(generation: str, parser):
    parser.parse(generation).pretty()

def gen_preproc(generation:str):
    eot_id_comparison = None
    match EXPERIMENT_TYPE:
        case "nogbd":
            # Treatment for returning generation until <|eot_id|>
            eot_id_gen = generation.split("<|eot_id|>")[0]
            # Treatment for replace =(comparision) with == (used in no gbd generation), As using simpify (Python code simulation)
            eot_id_comparison = eot_id_gen.replace("=", "==")

        case "gbd":
            # Treatment for returning generation until <|eot_id|>
            eot_id_comparison = generation.split("<|eot_id|>")[0]

        case "gbd+fewshots":
            # Treatment for returning generation until <|eot_id|>
            eot_id_comparison = generation.split("<|eot_id|>")[0]

    return eot_id_comparison

In [4]:

arithmetic_grammar = """
?start: comparison

?comparison: expression ("==" expression)* "<|eot_id|>"?

?expression: term (("+" | "-") term)*

?term: factor (("*" | "/") factor)*

?factor: NUMBER
       | "-" factor
       | "(" comparison ")"

%import common.NUMBER
%ignore " "  // Ignore spaces

// Define <|eot_id|> as a terminal
EOT_ID: "<|eot_id|>"
"""

arithmetic_prompt = None

match EXPERIMENT_TYPE:
    case "gbd":
        arithmetic_prompt=f"""
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful AI assistant for creating gramatically and sintactically arithmetic expression<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Rewrite 9 * 15 as others equivalents expressions:
Follow this example:
(5*5)=(5+5+5+5+5)=(25*1)=(5*3)+(5*2). 
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
        
    case "nogbd":
        arithmetic_prompt=f"""
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful AI assistant for creating gramatically and sintactically arithmetic expression<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Rewrite 9 * 15 as other equivalent expression, for the response, do not use text.
Just only characters available in this grammar: {arithmetic_grammar}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
"""
        
    case "gbd+fewshots":
        arithmetic_prompt="""
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful AI assistant for creating gramatically, equivalent and correct arithmetical expression<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Given the following examples:\n
(5*5)=(5+5+5+5+5)=(25*1)=(5*3)+(5*2).\n
(3*3)=(3+3+3)=(3+6)=(9*1).\n
(3*4*5)=3*(2+2)*5=15*4=15*(2+2)=(12*5)=(20*3).\n
Rewrite 9 * 15 as others equivalents expressions:
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

    case "grammar_in_prompt":
        arithmetic_prompt=f"""
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful AI assistant for creating gramatically and sintactically expression given this specific grammar: {arithmetic_grammar}<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Rewrite 9 * 15 as others equivalents expressions:
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

grammar = arithmetic_grammar

print(arithmetic_prompt)


<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful AI assistant for creating gramatically, equivalent and correct arithmetical expression<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Given the following examples:

(5*5)=(5+5+5+5+5)=(25*1)=(5*3)+(5*2).

(3*3)=(3+3+3)=(3+6)=(9*1).

(3*4*5)=3*(2+2)*5=15*4=15*(2+2)=(12*5)=(20*3).

Rewrite 9 * 15 as others equivalents expressions:
<|eot_id|><|start_header_id|>assistant<|end_header_id|>



In [5]:
# Model loading
load_dotenv()

# llama-3-70 quantized
llm = LLM(MODEL, gpu_memory_utilization=0.9, tensor_parallel_size=8, enforce_eager=False, quantization="gptq")
#llm = LLM('meta-llama/Llama-3.2-1B-Instruct', gpu_memory_utilization=0.9, tensor_parallel_size=8, enforce_eager=False, dtype="half")

INFO 10-22 14:52:47 config.py:813] Defaulting to use mp for distributed inference
INFO 10-22 14:52:47 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='study-hjt/Meta-Llama-3-70B-Instruct-GPTQ-Int8', speculative_config=None, tokenizer='study-hjt/Meta-Llama-3-70B-Instruct-GPTQ-Int8', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=8, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=gptq, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=study-hjt/Meta-Llama-3-70B-Ins

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.


INFO 10-22 14:52:49 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager
[1;36m(VllmWorkerProcess pid=2027116)[0;0m INFO 10-22 14:52:49 selector.py:217] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=2027116)[0;0m [1;36m(VllmWorkerProcess pid=2027117)[0;0m INFO 10-22 14:52:49 selector.py:116] Using XFormers backend.
INFO 10-22 14:52:49 selector.py:217] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=2027117)[0;0m INFO 10-22 14:52:49 selector.py:116] Using XFormers backend.
[1;36m(VllmWorkerProcess pid=2027118)[0;0m INFO 10-22 14:52:49 selector.py:217] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=2027118)[0;0m INFO 10-22 14:52:49 selector.py:116] Using XFormers backend.
INFO 10-22 14:52:50 selector.py:217] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
INFO 10-

[1;36m(VllmWorkerProcess pid=2027116)[0;0m [1;36m(VllmWorkerProcess pid=2027117)[0;0m   @torch.library.impl_abstract("xformers_flash::flash_fwd")
  @torch.library.impl_abstract("xformers_flash::flash_fwd")


[1;36m(VllmWorkerProcess pid=2027119)[0;0m INFO 10-22 14:52:50 selector.py:217] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=2027119)[0;0m [1;36m(VllmWorkerProcess pid=2027121)[0;0m INFO 10-22 14:52:50 selector.py:116] Using XFormers backend.
INFO 10-22 14:52:50 selector.py:217] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=2027121)[0;0m INFO 10-22 14:52:50 selector.py:116] Using XFormers backend.
[1;36m(VllmWorkerProcess pid=2027120)[0;0m INFO 10-22 14:52:50 selector.py:217] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=2027120)[0;0m INFO 10-22 14:52:50 selector.py:116] Using XFormers backend.


[1;36m(VllmWorkerProcess pid=2027124)[0;0m   @torch.library.impl_abstract("xformers_flash::flash_fwd")
[1;36m(VllmWorkerProcess pid=2027117)[0;0m [1;36m(VllmWorkerProcess pid=2027116)[0;0m   @torch.library.impl_abstract("xformers_flash::flash_bwd")
  @torch.library.impl_abstract("xformers_flash::flash_bwd")
[1;36m(VllmWorkerProcess pid=2027118)[0;0m   @torch.library.impl_abstract("xformers_flash::flash_fwd")
[1;36m(VllmWorkerProcess pid=2027121)[0;0m   @torch.library.impl_abstract("xformers_flash::flash_fwd")
[1;36m(VllmWorkerProcess pid=2027119)[0;0m   @torch.library.impl_abstract("xformers_flash::flash_fwd")
[1;36m(VllmWorkerProcess pid=2027120)[0;0m   @torch.library.impl_abstract("xformers_flash::flash_fwd")
[1;36m(VllmWorkerProcess pid=2027124)[0;0m   @torch.library.impl_abstract("xformers_flash::flash_bwd")
  @torch.library.impl_abstract("xformers_flash::flash_fwd")
  @torch.library.impl_abstract("xformers_flash::flash_bwd")
  @torch.library.impl_abstract("xformers

[1;36m(VllmWorkerProcess pid=2027124)[0;0m INFO 10-22 14:52:51 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=2027117)[0;0m INFO 10-22 14:52:51 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=2027116)[0;0m INFO 10-22 14:52:51 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=2027121)[0;0m INFO 10-22 14:52:51 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=2027120)[0;0m INFO 10-22 14:52:51 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=2027119)[0;0m INFO 10-22 14:52:51 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=2027118)[0;0m INFO 10-22 14:52:51 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
INFO 10-22 14:52:52 utils.py:975] Found nccl from library libnccl.so.2
[1;36m(VllmWorkerProcess pid=2027118)[0;0m INFO 10-2

Loading safetensors checkpoint shards:   0% Completed | 0/8 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  12% Completed | 1/8 [00:00<00:05,  1.35it/s]
Loading safetensors checkpoint shards:  25% Completed | 2/8 [00:01<00:03,  1.78it/s]
Loading safetensors checkpoint shards:  38% Completed | 3/8 [00:02<00:03,  1.28it/s]
Loading safetensors checkpoint shards:  50% Completed | 4/8 [00:03<00:03,  1.08it/s]
Loading safetensors checkpoint shards:  62% Completed | 5/8 [00:04<00:02,  1.01it/s]
Loading safetensors checkpoint shards:  75% Completed | 6/8 [00:05<00:02,  1.01s/it]
Loading safetensors checkpoint shards:  88% Completed | 7/8 [00:06<00:01,  1.02s/it]
Loading safetensors checkpoint shards: 100% Completed | 8/8 [00:07<00:00,  1.01s/it]
Loading safetensors checkpoint shards: 100% Completed | 8/8 [00:07<00:00,  1.06it/s]



[1;36m(VllmWorkerProcess pid=2027121)[0;0m INFO 10-22 14:53:02 model_runner.py:890] Loading model weights took 8.6492 GB
INFO 10-22 14:53:02 model_runner.py:890] Loading model weights took 8.6492 GB
[1;36m(VllmWorkerProcess pid=2027120)[0;0m INFO 10-22 14:53:02 model_runner.py:890] Loading model weights took 8.6492 GB
[1;36m(VllmWorkerProcess pid=2027117)[0;0m INFO 10-22 14:53:04 model_runner.py:890] Loading model weights took 8.6492 GB
[1;36m(VllmWorkerProcess pid=2027116)[0;0m INFO 10-22 14:53:04 model_runner.py:890] Loading model weights took 8.6492 GB
[1;36m(VllmWorkerProcess pid=2027119)[0;0m [1;36m(VllmWorkerProcess pid=2027118)[0;0m INFO 10-22 14:53:04 model_runner.py:890] Loading model weights took 8.6492 GB
INFO 10-22 14:53:04 model_runner.py:890] Loading model weights took 8.6492 GB
[1;36m(VllmWorkerProcess pid=2027124)[0;0m INFO 10-22 14:53:05 model_runner.py:890] Loading model weights took 8.6492 GB
INFO 10-22 14:53:08 distributed_gpu_executor.py:56] # GPU blo

In [6]:
# Experiment variables init

#seeds = [randint(1,SAMPLES*10e9) for i in range(SAMPLES)]
seeds = fixed_seeds()
only_generations = []
elapsed_time_gens = []

# Iterate experiments to generate completions
for i in range(SAMPLES):
    seed = seeds[i]

    sampling_params = SamplingParams(
    max_tokens=MAX_TOKENS,
    temperature=1,
    top_p=0.95,
    seed= seed
    )

    start_time = time.perf_counter()

    outputs = None

    match EXPERIMENT_TYPE:
        case "gbd":
            outputs = llm.generate(
                prompts=arithmetic_prompt,
                sampling_params=sampling_params,
                guided_options_request=dict(guided_grammar=grammar))
            
        case "nogbd":
            outputs = llm.generate(
                prompts=arithmetic_prompt,
                sampling_params=sampling_params,
                )
            
        case "gbd+fewshots":
            outputs = llm.generate(
                prompts=arithmetic_prompt,
                sampling_params=sampling_params,
                guided_options_request=dict(guided_grammar=grammar))

    elapsed_time = time.perf_counter() - start_time
    print(f'Elapsed time for generation nº{i}: {elapsed_time} seconds')

    elapsed_time = None
    gen_text = None
    try:
        elapsed_time = outputs[0].metrics.finished_time - outputs[0].metrics.arrival_time
    except:
        elapsed_time = outputs.metrics.finished_time - outputs.metrics.arrival_time
    
    gen_text = None
    try:
        gen_text = outputs[0].outputs[0].text
    except:
        gen_text = outputs.outputs[0].text
                    
    elapsed_time_gens.append(elapsed_time)
    only_generations.append(Generation(seed=seed, elapsed_time=elapsed_time, gen=gen_text))


rog0d: First step -> Exclusive guided decodings
rog0d: Second step -> validate and add request
rog0d: third step-> Decoding backend
rog0d: fourth step-> outlines backend
rog0d: fifth step-> outlines operation
rog0d: sixth step-> check outlines guiding options
rog0d: seventh step-> CFGLogitsProcessor class: CFGuide to BaseLogitsProcessor
rog0d: ninth step-> _init_ CFGGuide
rog0d: ninth step-> _init_ CFGGuide


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor


Compiling FSM index for all state transitions: 100%|██████████| 8/8 [00:00<00:00, 13.40it/s]


rog0d: eighth step-> _call_ BaseLogitsProcessor


Compiling FSM index for all state transitions: 100%|██████████| 12/12 [00:00<00:00, 20.21it/s]


rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsPr

Compiling FSM index for all state transitions: 100%|██████████| 2/2 [00:00<00:00,  7.17it/s]
Processed prompts: 100%|██████████| 1/1 [00:18<00:00, 18.94s/it, est. speed input: 7.02 toks/s, output: 2.90 toks/s]


Elapsed time for generation nº0: 20.389550366904587 seconds
rog0d: First step -> Exclusive guided decodings
rog0d: Second step -> validate and add request
rog0d: third step-> Decoding backend
rog0d: fourth step-> outlines backend
rog0d: fifth step-> outlines operation
rog0d: sixth step-> check outlines guiding options
rog0d: seventh step-> CFGLogitsProcessor class: CFGuide to BaseLogitsProcessor
rog0d: ninth step-> _init_ CFGGuide


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor


Compiling FSM index for all state transitions: 100%|██████████| 8/8 [00:00<00:00, 14.23it/s]


rog0d: eighth step-> _call_ BaseLogitsProcessor


Compiling FSM index for all state transitions: 100%|██████████| 12/12 [00:00<00:00, 20.22it/s]


rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsPr

Compiling FSM index for all state transitions: 100%|██████████| 2/2 [00:00<00:00,  7.00it/s]
Processed prompts: 100%|██████████| 1/1 [00:18<00:00, 18.00s/it, est. speed input: 7.39 toks/s, output: 3.22 toks/s]


Elapsed time for generation nº1: 18.57254038657993 seconds
rog0d: First step -> Exclusive guided decodings
rog0d: Second step -> validate and add request
rog0d: third step-> Decoding backend
rog0d: fourth step-> outlines backend
rog0d: fifth step-> outlines operation
rog0d: sixth step-> check outlines guiding options
rog0d: seventh step-> CFGLogitsProcessor class: CFGuide to BaseLogitsProcessor
rog0d: ninth step-> _init_ CFGGuide
rog0d: ninth step-> _init_ CFGGuide


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor


Compiling FSM index for all state transitions: 100%|██████████| 8/8 [00:00<00:00, 14.20it/s]


rog0d: eighth step-> _call_ BaseLogitsProcessor


Compiling FSM index for all state transitions: 100%|██████████| 12/12 [00:00<00:00, 20.10it/s]


rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsPr

Compiling FSM index for all state transitions: 100%|██████████| 2/2 [00:00<00:00,  7.08it/s]
Processed prompts: 100%|██████████| 1/1 [00:12<00:00, 12.81s/it, est. speed input: 10.38 toks/s, output: 4.84 toks/s]


Elapsed time for generation nº2: 13.526398505084217 seconds
rog0d: First step -> Exclusive guided decodings
rog0d: Second step -> validate and add request
rog0d: third step-> Decoding backend
rog0d: fourth step-> outlines backend
rog0d: fifth step-> outlines operation
rog0d: sixth step-> check outlines guiding options
rog0d: seventh step-> CFGLogitsProcessor class: CFGuide to BaseLogitsProcessor
rog0d: ninth step-> _init_ CFGGuide


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor


Compiling FSM index for all state transitions: 100%|██████████| 8/8 [00:00<00:00, 14.22it/s]


rog0d: eighth step-> _call_ BaseLogitsProcessor


Compiling FSM index for all state transitions: 100%|██████████| 12/12 [00:00<00:00, 20.27it/s]


rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsPr

Compiling FSM index for all state transitions: 100%|██████████| 2/2 [00:00<00:00,  6.80it/s]


rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsPr

Processed prompts: 100%|██████████| 1/1 [00:26<00:00, 26.59s/it, est. speed input: 5.00 toks/s, output: 3.76 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor
Elapsed time for generation nº3: 27.146677513141185 seconds
rog0d: First step -> Exclusive guided decodings
rog0d: Second step -> validate and add request
rog0d: third step-> Decoding backend
rog0d: fourth step-> outlines backend
rog0d: fifth step-> outlines operation
rog0d: sixth step-> check outlines guiding options
rog0d: seventh step-> CFGLogitsProcessor class: CFGuide to BaseLogitsProcessor





rog0d: ninth step-> _init_ CFGGuide


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsPr

Processed prompts: 100%|██████████| 1/1 [00:09<00:00,  9.97s/it, est. speed input: 13.35 toks/s, output: 5.92 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor
Elapsed time for generation nº4: 10.508775134105235 seconds
rog0d: First step -> Exclusive guided decodings
rog0d: Second step -> validate and add request
rog0d: third step-> Decoding backend
rog0d: fourth step-> outlines backend
rog0d: fifth step-> outlines operation
rog0d: sixth step-> check outlines guiding options
rog0d: seventh step-> CFGLogitsProcessor class: CFGuide to BaseLogitsProcessor





rog0d: ninth step-> _init_ CFGGuide


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsPr

Processed prompts: 100%|██████████| 1/1 [00:12<00:00, 12.10s/it, est. speed input: 11.00 toks/s, output: 5.62 toks/s]


Elapsed time for generation nº5: 12.643198764882982 seconds
rog0d: First step -> Exclusive guided decodings
rog0d: Second step -> validate and add request
rog0d: third step-> Decoding backend
rog0d: fourth step-> outlines backend
rog0d: fifth step-> outlines operation
rog0d: sixth step-> check outlines guiding options
rog0d: seventh step-> CFGLogitsProcessor class: CFGuide to BaseLogitsProcessor
rog0d: ninth step-> _init_ CFGGuide


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsPr

Processed prompts: 100%|██████████| 1/1 [00:11<00:00, 11.79s/it, est. speed input: 11.28 toks/s, output: 5.85 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor
Elapsed time for generation nº6: 12.339034235104918 seconds
rog0d: First step -> Exclusive guided decodings
rog0d: Second step -> validate and add request
rog0d: third step-> Decoding backend
rog0d: fourth step-> outlines backend
rog0d: fifth step-> outlines operation
rog0d: sixth step-> check outlines guiding options
rog0d: seventh step-> CFGLogitsProcessor class: CFGuide to BaseLogitsProcessor





rog0d: ninth step-> _init_ CFGGuide


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsPr

Processed prompts: 100%|██████████| 1/1 [00:10<00:00, 10.88s/it, est. speed input: 12.22 toks/s, output: 5.88 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor
Elapsed time for generation nº7: 11.422729100100696 seconds
rog0d: First step -> Exclusive guided decodings
rog0d: Second step -> validate and add request
rog0d: third step-> Decoding backend
rog0d: fourth step-> outlines backend
rog0d: fifth step-> outlines operation
rog0d: sixth step-> check outlines guiding options
rog0d: seventh step-> CFGLogitsProcessor class: CFGuide to BaseLogitsProcessor





rog0d: ninth step-> _init_ CFGGuide


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsPr

Processed prompts: 100%|██████████| 1/1 [00:13<00:00, 13.76s/it, est. speed input: 9.67 toks/s, output: 6.03 toks/s]


Elapsed time for generation nº8: 14.29607446398586 seconds
rog0d: First step -> Exclusive guided decodings
rog0d: Second step -> validate and add request
rog0d: third step-> Decoding backend
rog0d: fourth step-> outlines backend
rog0d: fifth step-> outlines operation
rog0d: sixth step-> check outlines guiding options
rog0d: seventh step-> CFGLogitsProcessor class: CFGuide to BaseLogitsProcessor
rog0d: ninth step-> _init_ CFGGuide


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsProcessor
rog0d: eighth step-> _call_ BaseLogitsPr

Processed prompts: 100%|██████████| 1/1 [00:16<00:00, 16.51s/it, est. speed input: 8.06 toks/s, output: 6.00 toks/s]

Elapsed time for generation nº9: 17.051023880951107 seconds





In [7]:
# testing request and eot_id
print(outputs[0])
print(outputs[0].metrics)
print(outputs[0].outputs[0].text)
print(outputs[0].outputs[0].text.split("<|eot_id|>")[0])


RequestOutput(request_id=9, prompt='\n<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful AI assistant for creating gramatically, equivalent and correct arithmetical expression<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\nGiven the following examples:\n\n(5*5)=(5+5+5+5+5)=(25*1)=(5*3)+(5*2).\n\n(3*3)=(3+3+3)=(3+6)=(9*1).\n\n(3*4*5)=3*(2+2)*5=15*4=15*(2+2)=(12*5)=(20*3).\n\nRewrite 9 * 15 as others equivalents expressions:\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n', prompt_token_ids=[128000, 198, 128000, 128006, 9125, 128007, 271, 2675, 527, 264, 11190, 15592, 18328, 369, 6968, 23882, 7167, 11, 13890, 323, 4495, 802, 411, 4150, 950, 7645, 128009, 198, 128006, 882, 128007, 198, 22818, 279, 2768, 10507, 1473, 7, 20, 9, 20, 74273, 20, 10, 20, 10, 20, 10, 20, 10, 20, 74273, 914, 9, 16, 74273, 20, 9, 18, 42894, 20, 9, 17, 3677, 7, 18, 9, 18, 74273, 18, 10, 18, 10, 18, 74273, 18, 10, 21, 74273, 24, 9, 16, 3677, 7, 18, 9, 19, 9, 20, 11992, 

In [8]:
for gen in only_generations:
    print(gen.__dict__)

assert(len(only_generations) == SAMPLES)

{'seed': 424533559246, 'elapsed_time': 18.944291353225708, 'gen': '9*15==9*3*5==3*3*3*5==3*3*15==3*45==9*5*3==9*10+5==90+45==135<|eot_id|>'}
{'seed': 978212965549, 'elapsed_time': 18.003395080566406, 'gen': '9*15==9*3*5==9*10+45==45+45==90*1==10*9==3*3*15==3*45==5*27==27*3+12<|eot_id|>'}
{'seed': 44756014166, 'elapsed_time': 12.814391613006592, 'gen': '9*15==9*3*5==3*3*3*5==3*3*15==3*45==45*3==5*27==27*5==3*5*3*5==15*9<|eot_id|>'}
{'seed': 534771852899, 'elapsed_time': 26.59004831314087, 'gen': '9*15==9*3*5==9*5*3==3*3*3*5==3*3*15==45*3==3*45==15*9<|eot_id|>                                                   '}
{'seed': 913899456058, 'elapsed_time': 9.965386867523193, 'gen': '9*15==9*3*5==3*3*3*5==3*3*15==3*45==9*5*3==9*10+5==5*9*3==135*1<|eot_id|>'}
{'seed': 392888992261, 'elapsed_time': 12.098276615142822, 'gen': '9*15==9+9+9+9+9+9+9+9+9+9+9+9+9+9+9==3*3*15==3*45==9*3*5==9*10+5==135<|eot_id|>   '}
{'seed': 981758150272, 'elapsed_time': 11.794308185577393, 'gen': '9*15==9*3*5==3*3*3*5=

In [9]:
# Save and load results

# Write the jsonl and serialize the gens
with open(f"./samples/{MODEL}/{EXPERIMENT_TYPE}/{SAMPLES}e_{MAX_TOKENS}t.jsonl", 'w') as file:
    for gen in only_generations:
        json_line = json.dumps(gen.__dict__)
        file.write(json_line + "\n")

# Read the jsonl and deserialize back
generation_from_file = []
with open(f"./samples/{MODEL}/{EXPERIMENT_TYPE}/{SAMPLES}e_{MAX_TOKENS}t.jsonl", "r") as file:
    for line in file:
        gen = json.loads(line, object_hook=gen_encoder)
        generation_from_file.append(gen)


In [10]:
# Checking syntax
syntactic_results=[]
parser = Lark(grammar, parser='lalr')

for gen in generation_from_file:

    try:
        # Parse a generation
        gen_preprocesed = gen_preproc(gen.gen)
        syntax_text(generation=gen_preprocesed, parser=parser)
        syntactic_results.append(1)

    except:
        syntactic_results.append(0)


In [11]:
print(f"total syntactically valid: {syntactic_results.count(1)}" )
print(f"total syntactically invalid: {syntactic_results.count(0)}" )

print(f"Percentaje syntactically valid: {syntactic_results.count(1)/SAMPLES}%" )


total syntactically valid: 10
total syntactically invalid: 0
Percentaje syntactically valid: 1.0%


In [12]:
# Checking semantic

semantic_results=[]

for gen in generation_from_file:

    try:
        # Parse a generation
        gen_preprocesed = gen_preproc(gen.gen)
        semantic_test(generation=gen_preprocesed)
        semantic_results.append(1)

    except:
        semantic_results.append(0)


In [13]:
print(f"total semantically valid: {semantic_results.count(1)}" )
print(f"total semantically invalid: {semantic_results.count(0)}" )

print(f"Percentaje semantically valid: {semantic_results.count(1)/SAMPLES}%" )

total semantically valid: 10
total semantically invalid: 0
Percentaje semantically valid: 1.0%


In [14]:
res = Result(seeds=seeds, elapsed_time_gen= elapsed_time_gens, syntax_validation= syntactic_results, semantic_validation=semantic_results)
res.save()

In [34]:
seeds = [randint(1,100*10e9) for i in range(100)]


In [39]:
# dirty script to extend the 100 original seeds to 200 and 50
seeds_100 = fixed_seeds()

with open(f"./seeds/100.jsonl", "w") as file:
    json_line = json.dumps(seeds_100)
    file.write(json_line + "\n")
    
load_100 = []
with open(f"./seeds/100.jsonl", "r") as file:
    for line in file:
        load_100 = json.loads(line)

print(load_100)

load_100.extend(seeds)
unique_200_seed = set(load_100)
print(len(unique_200_seed))

with open(f"./seeds/200.jsonl", "w") as file:
    json_line = json.dumps(load_100)
    file.write(json_line + "\n")

with open(f"./seeds/200.jsonl", "r") as file:
    for line in file:
        load_200 = json.loads(line)

print(len(load_200))

load_50 = load_100[:50]
with open(f"./seeds/50.jsonl", "w") as file:
    json_line = json.dumps(load_50)
    file.write(json_line + "\n")

with open(f"./seeds/50.jsonl", "r") as file:
    for line in file:
        load_50 = json.loads(line)

print(len(load_50))

[424533559246, 978212965549, 44756014166, 534771852899, 913899456058, 392888992261, 981758150272, 240123516436, 152491468919, 151534339700, 106325369466, 878829203005, 588022882667, 777315144758, 664904820127, 162788563212, 108706256090, 80444014844, 935868384285, 363714941483, 616208160856, 386979565045, 345462137507, 702703408826, 227259652029, 609741931458, 487380045831, 574947128223, 65543315396, 98844556457, 926509076717, 779102014642, 862536141389, 690064463490, 670019806534, 908358000256, 368798545707, 799911506283, 774490855371, 72458122217, 242955608656, 881493143288, 158774376403, 600450083710, 100708262469, 963447298339, 540877402764, 331180881682, 320195254712, 136178644350, 367423460971, 225658799810, 880312983411, 603886083987, 316056408696, 100695461126, 878734360942, 349545488806, 264465407347, 203110418637, 902756561703, 35161735606, 286288172152, 75061129085, 743415134881, 144987804857, 166974457324, 42620596937, 89517685033, 772656770167, 914497675644, 749646168506, 